metakgp · chirag-ghosh · Sep 11, 2023 · Sep 5, 2023 · Sep 7, 2023 · Sep 7, 2023
diff --git a/.gitignore b/.gitignore
@@ -1,14 +1,11 @@
 .env
 chillzone
 t
-schedule.json
-subjectDetails.json
-all_subjects.json
-empty_schedule.json
 .idea
 .vscode/
 *.xlsx
 frontend/node_modules/
 frontend/coverage/
 frontend/build/
-first_year_scraper/__pycache__
+__pycache__/
+other-years-scraper/other-years-scraper
diff --git a/first-year-scraper/aut2023.pdf b/first-year-scraper/aut2023.pdf
diff --git a/first_year_scraper/excel_parser.py → first-year-scraper/excel_parser.py b/first_year_scraper/excel_parser.py → first-year-scraper/excel_parser.py
@@ -1,4 +1,4 @@
-import camelot
+import camelot.io as camelot
 
 def parse_pdf(filename, target_filename):
     print("Started processing pdf file. This might take a while...")
@@ -7,4 +7,4 @@ def parse_pdf(filename, target_filename):
     print("Done parsing pdf. exported as: ", target_filename)
 
 if __name__ == "__main__":
-    parse_pdf("spr2019.pdf","test.xlsx")
+    parse_pdf("test.pdf","test.xlsx")
diff --git a/first-year-scraper/generate_schedule.py b/first-year-scraper/generate_schedule.py
@@ -0,0 +1,65 @@
+from openpyxl import load_workbook
+import generate_subjectDetails
+import json
+
+
+def sanitise_sheets(valid_sheets, workbook):
+    for sheet in valid_sheets:
+        worksheet = workbook[sheet]
+        worksheet.delete_cols(1,2)
+        worksheet.delete_rows(1,2)
+
+def getkey(a, b):
+    '''returns key for cell[a][b] in excel'''
+    return str(chr(ord('A')+a-1))+str(b)
+
+def format_cell(original_value):
+    '''converts cell text to actually usable array of subject name and room numbers'''
+    if(original_value is None):
+        pass
+    return [x for sub in original_value.split("\n") for j in sub.split(" ") for x in j.split(",") if x]
+
+def generate_schedule(valid_sheets, workbook, subjects_dict):
+    with open("../frontend/src/schedule.json", "r") as json_data:
+        schedule_dict = json.load(json_data)
+    with open("../frontend/src/empty_schedule.json", "r") as json_data:
+        empty_schedule_dict = json.load(json_data)
+    for sheet in valid_sheets:
+        worksheet = workbook[sheet]
+        if(worksheet['L2'].value is not None or worksheet['K2'].value is None):
+            print("parsing error in sheet {}".format(sheet))
+            worksheet.delete_cols(5,1)
+        worksheet.delete_cols(7,1)
+        for i in range(0, 6):
+            for j in range(0, 9):
+                cell = worksheet[getkey(j+2, i+3)].value
+                if cell is not None and ("NR" in cell or "NC" in cell):
+                    cell_value = format_cell(cell)
+                    for room in cell_value[1::]:
+                        if(schedule_dict[room][i][j]==""):
+                            schedule_dict[room][i][j] = subjects_dict[cell_value[0]][0]
+                        if(room in empty_schedule_dict[i][j]):
+                            empty_schedule_dict[i][j].remove(room)
+
+    with open("../frontend/src/schedule.json", "w") as outfile:
+        json.dump(schedule_dict, outfile, indent=2)
+
+    with open("../frontend/src/empty_schedule.json", "w") as outfile:
+        json.dump(empty_schedule_dict, outfile, indent=2)
+
+
+def format_excel(filename, subjects_dict):
+    workbook = load_workbook(filename=filename)
+    sheets = workbook.sheetnames
+    valid_sheets = [sheet for sheet in sheets if (workbook[sheet]['D10'].value == "EAA" or workbook[sheet]['E10'].value == "EAA")]
+
+    print("count of sheets with timetable : {}".format(len(valid_sheets)))
+
+    sanitise_sheets(valid_sheets, workbook)
+    workbook.save(filename.split('.')[0]+"_new.xlsx")
+
+    generate_schedule(valid_sheets, workbook, subjects_dict)
+
+
+if __name__=="__main__":
+    format_excel("workbook.xlsx", generate_subjectDetails.generate_subjectDetails())    
diff --git a/first-year-scraper/generate_subjectDetails.py b/first-year-scraper/generate_subjectDetails.py
@@ -0,0 +1,46 @@
+from openpyxl import load_workbook
+import json
+
+def get_cell_value(a, b):
+    '''returns cell value for cell[a][b] in excel'''
+    return sheet[str(chr(ord('A')+a-1))+str(b)].value
+
+def generate_subjectDetails():
+
+    workbook = load_workbook("workbook.xlsx")
+    global sheet
+    sheet = workbook[workbook.sheetnames[2]]
+
+    subjects_dict = {}
+
+    for i in range(0, 15):
+        if not ("Laboratory" in get_cell_value(5, i+4)):
+            if(get_cell_value(6, i+4)[2:3] == "0"):
+                subjects_dict[get_cell_value(4, i+4)[0:2]] = [
+                    get_cell_value(4, i+4),
+                    get_cell_value(5, i+4).upper(),
+                ]
+            else:
+                subjects_dict[get_cell_value(4, i+4)[0:2]] = [
+                    get_cell_value(4, i+4),
+                    get_cell_value(5, i+4).upper(),
+                ]
+                subjects_dict[get_cell_value(4, i+4)[0:2]+"(T)"] = [
+                    get_cell_value(4, i+4)+"(T)",
+                    get_cell_value(5, i+4).upper()+" TUTORIAL",
+                ]
+
+    with open("../frontend/src/subjectDetails.json", "r") as json_data:
+        subject_details = json.load(json_data)
+
+    for key in subjects_dict.keys():
+        subject_details[subjects_dict[key][0]] = subjects_dict[key][1]
+
+    with open("../frontend/src/subjectDetails.json", "w") as outfile:
+        json.dump(subject_details, outfile, indent=2)
+
+    return subjects_dict
+
+
+if __name__=="__main__":
+    generate_subjectDetails()    
diff --git a/first-year-scraper/main.py b/first-year-scraper/main.py
@@ -0,0 +1,10 @@
+import excel_parser
+import generate_subjectDetails
+import generate_schedule
+
+target_filename = "workbook.xlsx"
+
+filename = input("\nEnter the first year timetable pdf file name: ")
+
+excel_parser.parse_pdf(filename, target_filename)
+generate_schedule.format_excel(target_filename, generate_subjectDetails.generate_subjectDetails())
diff --git a/first-year-scraper/test.pdf b/first-year-scraper/test.pdf
diff --git a/first-year.csv b/first-year.csv
diff --git a/first-year.csv.template b/first-year.csv.template
diff --git a/first_year_scraper/aut2019.pdf b/first_year_scraper/aut2019.pdf
diff --git a/first_year_scraper/excel_formatter.py b/first_year_scraper/excel_formatter.py
diff --git a/first_year_scraper/first_year.csv b/first_year_scraper/first_year.csv
diff --git a/first_year_scraper/main.py b/first_year_scraper/main.py