diff --git a/requirements.txt b/requirements.txt index 6e1b5dd..98d7149 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ python-dateutil==2.8.2 lxml==4.9.2 cachetools==5.3.0 black==23.1.0 -flake8==6.0.0 \ No newline at end of file +flake8==6.0.0 +tenacity==8.2.3 diff --git a/webapp/update.py b/webapp/update.py index 448768c..5115055 100644 --- a/webapp/update.py +++ b/webapp/update.py @@ -1,3 +1,7 @@ +from typing import List, Dict + +import tenacity + from webapp.google import Drive, Sheets from webapp.spec import Spec from webapp.settings import ( @@ -7,6 +11,64 @@ TMP_SHEET_TITLE, ) +try: + from itertools import batched +except ImportError: + from itertools import islice + + def batched(iterable, n): + # batched('ABCDEFG', 3) --> ABC DEF G + if n < 1: + raise ValueError("n must be at least one") + it = iter(iterable) + while batch := tuple(islice(it, n)): + yield batch + + +def _generate_spec_rows_for_folders(drive: Drive, folders: List[Dict]): + for folder in folders: + query_doc_files = ( + f"mimeType = 'application/vnd.google-apps.document' " + f"and '{folder['id']}' in parents" + ) + files = drive.get_files( + query=query_doc_files, + fields=( + "id", + "name", + "createdTime", + "modifiedTime", + "webViewLink", + ), + ) + for file_ in files: + try: + comments = drive.get_comments( + file_id=file_["id"], fields=("resolved",) + ) + open_comments = [c for c in comments if not c["resolved"]] + parsed_doc = Spec(google_drive=drive, document_id=file_["id"]) + except Exception as e: + print(f"Unable to parse document: {file_['name']}", e) + continue + + row = [ + folder["name"], + file_["name"], + file_["id"], + file_["webViewLink"], + parsed_doc.metadata["index"], + parsed_doc.metadata["title"], + parsed_doc.metadata["status"], + ", ".join(parsed_doc.metadata["authors"]), + parsed_doc.metadata["type"], + file_["createdTime"], + file_["modifiedTime"], + len(comments), + len(open_comments), + ] + yield row + def update_sheet() -> None: """ @@ -19,10 +81,17 @@ def update_sheet() -> None: specs_sheet = sheets.get_sheet_by_title(SPECS_SHEET_TITLE) tmp_sheet = sheets.ensure_sheet_by_title(TMP_SHEET_TITLE) - sheets.clear(sheet_id=tmp_sheet["properties"]["sheetId"]) + @tenacity.retry( + stop=tenacity.stop_after_attempt(3), + wait=tenacity.wait_incrementing(start=0.5, increment=0.8), + ) + def _append_rows(rows): + """Helper to retry extending the TMP_SHEET.""" + return sheets.insert_rows(rows, range=TMP_SHEET_TITLE) + sheets.clear(sheet_id=tmp_sheet["properties"]["sheetId"]) # Add headers - sheets.insert_rows( + _append_rows( rows=[ [ "Folder name", @@ -39,8 +108,7 @@ def update_sheet() -> None: "Number of comments", "Number of open comments", ] - ], - range=TMP_SHEET_TITLE, + ] ) query_subfolders = ( @@ -49,52 +117,9 @@ def update_sheet() -> None: ) folders = drive.get_files(query=query_subfolders, fields=("id", "name")) - for folder in folders: - query_doc_files = ( - f"mimeType = 'application/vnd.google-apps.document' " - f"and '{folder['id']}' in parents" - ) - files = drive.get_files( - query=query_doc_files, - fields=( - "id", - "name", - "createdTime", - "modifiedTime", - "webViewLink", - ), - ) - for file in files: - try: - comments = drive.get_comments( - file_id=file["id"], fields=("resolved",) - ) - open_comments = [c for c in comments if not c["resolved"]] - - parsed_doc = Spec(google_drive=drive, document_id=file["id"]) - except Exception as e: - print(f"Unable to parse document: {file['name']}", e) - continue - - row = [ - folder["name"], - file["name"], - file["id"], - file["webViewLink"], - parsed_doc.metadata.get("index"), - parsed_doc.metadata.get("title"), - parsed_doc.metadata.get("status"), - ", ".join(parsed_doc.metadata.get("authors")), - parsed_doc.metadata.get("type"), - file["createdTime"], - file["modifiedTime"], - len(comments), - len(open_comments), - ] - sheets.insert_rows( - rows=[row], - range=TMP_SHEET_TITLE, - ) + # Insert rows in batches of 25, which is a magic number with no science behind it. + for rows in batched(_generate_spec_rows_for_folders(drive, folders), 25): + _append_rows(rows=rows) # Rename temporary file as the main one once it contains all the specs sheets.update_sheet_name(