DhanushNehru · Retsamic · Feb 9, 2024 · Feb 9, 2024 · Feb 9, 2024 · Feb 9, 2024
diff --git a/PDF Organizer Script/README.md b/PDF Organizer Script/README.md
@@ -0,0 +1,39 @@
+# PDF Organizer Script
+
+### Description
+
+The PDF Organizer is a Python script designed to help you manage your collection of PDF files efficiently. It analyzes each PDF file in a specified directory, reading metadata such as titles and authors, and organizes the files into subfolders based on these metadata categories. Additionally, it renames the PDFs according to a consistent format ("Author - Title.pdf") and generates a summary report of the folder's contents, including a count of PDFs per category.
+
+## Features
+
+### Metadata Extraction:
+
+Reads metadata from PDF files to get the title, author, and number of pages.
+
+### Dynamic Organization: 
+
+Automatically creates subfolders for different authors and moves the PDFs into these folders.
+
+### File Renaming: 
+
+Renames PDF files following a consistent naming convention for easier identification.
+
+### Summary Report:
+
+Generates a report detailing the organization process, including the number of PDFs processed and sorted by category.
+
+## Prerequisites
+
+Before you can use the PDF Organizer script, you need to have Python installed on your system. Additionally, the script depends on the PyPDF2 library for reading PDF metadata.
+
+## Installation
+
+1. Install Python
+2. Install PyPDF2: Run the following command to install the PyPDF2 library: pip install PyPDF2
+
+## Usage
+
+1. Place the pdf_organizer.py script in a directory of your choice.
+2. Open a terminal or command prompt and navigate to the directory where the script is located.
+3. Run the script with Python by executing the following command: python pdf_organizer.py
+4. Before running the script, make sure to modify the source_folder variable in the script to point to the directory containing your PDF files and the report_path variable to where you want the summary report to be saved.
diff --git a/PDF Organizer Script/pdf_organizer.py b/PDF Organizer Script/pdf_organizer.py
@@ -0,0 +1,66 @@
+import os
+import shutil
+import string
+from PyPDF2 import PdfReader
+
+# Function to extract PDF metadata
+def extract_pdf_metadata(pdf_path):
+    try:
+        reader = PdfReader(pdf_path)
+        metadata = reader.metadata
+        num_pages = len(reader.pages)
+        title = metadata.get('/Title', 'Unknown Title')
+        author = metadata.get('/Author', 'Unknown Author')
+        return {'title': title, 'author': author, 'num_pages': num_pages}
+    except Exception as e:
+        print(f"Error reading {pdf_path}: {e}")
+        return None
+
+# Function to organize PDFs into subfolders based on author
+def organize_pdfs_by_author(source_folder):
+    pdf_files = [f for f in os.listdir(source_folder) if f.endswith('.pdf')]
+    summary = {}
+
+    for pdf_file in pdf_files:
+        pdf_path = os.path.join(source_folder, pdf_file)
+        metadata = extract_pdf_metadata(pdf_path)
+
+        if metadata:
+            author_folder = os.path.join(source_folder, sanitize_filename(metadata['author']))
+            if not os.path.exists(author_folder):
+                os.makedirs(author_folder)
+
+            new_pdf_name = f"{sanitize_filename(metadata['author'])} - {sanitize_filename(metadata['title'])}.pdf"
+            new_pdf_path = os.path.join(author_folder, new_pdf_name)
+
+            shutil.move(pdf_path, new_pdf_path)
+
+            if metadata['author'] in summary:
+                summary[metadata['author']] += 1
+            else:
+                summary[metadata['author']] = 1
+
+    return summary
+
+# Function to sanitize filenames to remove invalid characters
+def sanitize_filename(filename):
+    valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
+    sanitized_filename = ''.join(c for c in filename if c in valid_chars)
+    return sanitized_filename.strip()
+
+# Function to generate a summary report
+def generate_summary_report(summary, report_path):
+    with open(report_path, 'w') as report_file:
+        for author, count in summary.items():
+            report_file.write(f"Author: {author}, PDFs: {count}\n")
+
+# Main function to execute the organizer
+def main():
+    source_folder = '/path/to/pdf/folder'
+    report_path = '/path/to/summary/report.txt'
+    summary = organize_pdfs_by_author(source_folder)
+    generate_summary_report(summary, report_path)
+    print("PDF organization and summary report generation complete.")
+
+if __name__ == "__main__":
+    main()