From 21625292dbbd3314df8c911e193dfddfc1afbe4c Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Thu, 3 Oct 2024 11:17:36 -0700 Subject: [PATCH 1/4] Wrap google API execute calls with a 5 attempt retry --- schematic/manifest/generator.py | 72 ++++++++++++++++++----------- schematic/utils/google_api_utils.py | 59 +++++++++++++++++++---- 2 files changed, 95 insertions(+), 36 deletions(-) diff --git a/schematic/manifest/generator.py b/schematic/manifest/generator.py index d954506a5..47acad4b4 100644 --- a/schematic/manifest/generator.py +++ b/schematic/manifest/generator.py @@ -27,6 +27,7 @@ build_service_account_creds, execute_google_api_requests, export_manifest_drive_service, + google_api_execute_wrapper, ) from schematic.utils.schema_utils import ( DisplayLabelType, @@ -190,11 +191,11 @@ def _gdrive_copy_file(self, origin_file_id, copy_title): copied_file = {"name": copy_title} # return new copy sheet ID - return ( + return google_api_execute_wrapper( self.drive_service.files() .copy(fileId=origin_file_id, body=copied_file) - .execute()["id"] - ) + .execute + )["id"] def _create_empty_manifest_spreadsheet(self, title: str) -> str: """ @@ -215,12 +216,11 @@ def _create_empty_manifest_spreadsheet(self, title: str) -> str: else: spreadsheet_body = {"properties": {"title": title}} - spreadsheet_id = ( + spreadsheet_id = google_api_execute_wrapper( self.sheet_service.spreadsheets() .create(body=spreadsheet_body, fields="spreadsheetId") - .execute() - .get("spreadsheetId") - ) + .execute + ).get("spreadsheetId") return spreadsheet_id @@ -265,7 +265,7 @@ def callback(request_id, response, exception): fields="id", ) ) - batch.execute() + google_api_execute_wrapper(batch.execute) def _store_valid_values_as_data_dictionary( self, column_id: int, valid_values: list, spreadsheet_id: str @@ -297,7 +297,7 @@ def _store_valid_values_as_data_dictionary( + str(len(values) + 1) ) valid_values = [{"userEnteredValue": "=" + target_range}] - response = ( + response = google_api_execute_wrapper( self.sheet_service.spreadsheets() .values() .update( @@ -306,7 +306,7 @@ def _store_valid_values_as_data_dictionary( valueInputOption="RAW", body=body, ) - .execute() + .execute ) return valid_values @@ -560,15 +560,31 @@ def _gs_add_and_format_columns(self, required_metadata_fields, spreadsheet_id): range = "Sheet1!A1:" + str(end_col_letter) + "1" # adding columns - self.sheet_service.spreadsheets().values().update( - spreadsheetId=spreadsheet_id, range=range, valueInputOption="RAW", body=body - ).execute() + google_api_execute_wrapper( + self.sheet_service.spreadsheets() + .values() + .update( + spreadsheetId=spreadsheet_id, + range=range, + valueInputOption="RAW", + body=body, + ) + .execute + ) # adding columns to 2nd sheet that can be used for storing data validation ranges (this avoids limitations on number of dropdown items in excel and openoffice) range = "Sheet2!A1:" + str(end_col_letter) + "1" - self.sheet_service.spreadsheets().values().update( - spreadsheetId=spreadsheet_id, range=range, valueInputOption="RAW", body=body - ).execute() + google_api_execute_wrapper( + self.sheet_service.spreadsheets() + .values() + .update( + spreadsheetId=spreadsheet_id, + range=range, + valueInputOption="RAW", + body=body, + ) + .execute + ) # format column header row header_format_body = { @@ -612,10 +628,10 @@ def _gs_add_and_format_columns(self, required_metadata_fields, spreadsheet_id): ] } - response = ( + response = google_api_execute_wrapper( self.sheet_service.spreadsheets() .batchUpdate(spreadsheetId=spreadsheet_id, body=header_format_body) - .execute() + .execute ) return response, ordered_metadata_fields @@ -664,13 +680,13 @@ def _gs_add_additional_metadata( "data": data, } - response = ( + response = google_api_execute_wrapper( self.sheet_service.spreadsheets() .values() .batchUpdate( spreadsheetId=spreadsheet_id, body=batch_update_values_request_body ) - .execute() + .execute ) return response @@ -765,11 +781,11 @@ def _request_regex_match_vr_formatting( split_rules = validation_rules[0].split(" ") if split_rules[0] == "regex" and split_rules[1] == "match": # Set things up: - ## Extract the regular expression we are validating against. + # Extract the regular expression we are validating against. regular_expression = split_rules[2] - ## Define text color to update to upon correct user entry + # Define text color to update to upon correct user entry text_color = {"red": 0, "green": 0, "blue": 0} - ## Define google sheets regular expression formula + # Define google sheets regular expression formula gs_formula = [ { "userEnteredValue": '=REGEXMATCH(INDIRECT("RC",FALSE), "{}")'.format( @@ -777,11 +793,11 @@ def _request_regex_match_vr_formatting( ) } ] - ## Set validaiton strictness based on user specifications. + # Set validaiton strictness based on user specifications. if split_rules[-1].lower() == "strict": strict = True - ## Create error message for users if they enter value with incorrect formatting + # Create error message for users if they enter value with incorrect formatting input_message = ( f"Values in this column are being validated " f"against the following regular expression ({regular_expression}) " @@ -790,7 +806,7 @@ def _request_regex_match_vr_formatting( ) # Create Requests: - ## Change request to change the text color of the column we are validating to red. + # Change request to change the text color of the column we are validating to red. requests_vr_format_body = self._request_update_base_color( i, color={ @@ -800,10 +816,10 @@ def _request_regex_match_vr_formatting( }, ) - ## Create request to for conditionally formatting user input. + # Create request to for conditionally formatting user input. requests_vr = self._request_regex_vr(gs_formula, i, text_color) - ## Create request to generate data validator. + # Create request to generate data validator. requests_data_validation_vr = self._get_column_data_validation_values( spreadsheet_id, valid_values=gs_formula, diff --git a/schematic/utils/google_api_utils.py b/schematic/utils/google_api_utils.py index b705e0419..ffd0ed8e0 100644 --- a/schematic/utils/google_api_utils.py +++ b/schematic/utils/google_api_utils.py @@ -2,14 +2,23 @@ # pylint: disable=logging-fstring-interpolation -import os -import logging import json -from typing import Any, Union, no_type_check, TypedDict +import logging +import os +from typing import Any, Callable, TypedDict, Union, no_type_check import pandas as pd -from googleapiclient.discovery import build, Resource # type: ignore from google.oauth2 import service_account # type: ignore +from googleapiclient.discovery import Resource, build # type: ignore +from googleapiclient.errors import HttpError +from tenacity import ( + retry, + retry_if_exception_type, + stop_after_attempt, + wait_chain, + wait_fixed, +) + from schematic.configuration.configuration import CONFIG logger = logging.getLogger(__name__) @@ -86,10 +95,10 @@ def execute_google_api_requests(service, requests_body, **kwargs) -> Any: and kwargs["service_type"] == "batch_update" ): # execute all requests - response = ( + response = google_api_execute_wrapper( service.spreadsheets() .batchUpdate(spreadsheetId=kwargs["spreadsheet_id"], body=requests_body) - .execute() + .execute ) return response @@ -118,10 +127,10 @@ def export_manifest_drive_service( # use google drive # Pylint seems to have trouble with the google api classes, recognizing their methods - data = ( + data = google_api_execute_wrapper( drive_service.files() # pylint: disable=no-member .export(fileId=spreadsheet_id, mimeType=mime_type) - .execute() + .execute ) # open file and write data @@ -145,3 +154,37 @@ def export_manifest_csv(file_path: str, manifest: Union[pd.DataFrame, str]) -> N manifest.to_csv(file_path, index=False) else: export_manifest_drive_service(manifest, file_path, mime_type="text/csv") + + +def raise_final_error(retry_state: Any) -> Any: + """After the final attempt, raise the error. + + Args: + retry_state (Any): retry state object + + Returns: + Any: result of the outcome + """ + return retry_state.outcome.result() + + +@retry( + stop=stop_after_attempt(5), + wait=wait_chain( + *[wait_fixed(1) for i in range(2)] + + [wait_fixed(2) for i in range(2)] + + [wait_fixed(5)] + ), + retry=retry_if_exception_type(HttpError), + retry_error_callback=raise_final_error, +) +def google_api_execute_wrapper(api_function_to_call: Callable[[], Any]) -> Any: + """Retry wrapper for Google API calls, with a backoff strategy. + + Args: + api_function_to_call (Callable[[], Any]): The function to call + + Returns: + Any: The result of the API call + """ + return api_function_to_call() From f7dd107e9f96ff7afac9af1c8a94eaf7fd5d7eba Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Thu, 3 Oct 2024 11:24:44 -0700 Subject: [PATCH 2/4] No retry error callback --- schematic/utils/google_api_utils.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/schematic/utils/google_api_utils.py b/schematic/utils/google_api_utils.py index ffd0ed8e0..e46a1819c 100644 --- a/schematic/utils/google_api_utils.py +++ b/schematic/utils/google_api_utils.py @@ -156,18 +156,6 @@ def export_manifest_csv(file_path: str, manifest: Union[pd.DataFrame, str]) -> N export_manifest_drive_service(manifest, file_path, mime_type="text/csv") -def raise_final_error(retry_state: Any) -> Any: - """After the final attempt, raise the error. - - Args: - retry_state (Any): retry state object - - Returns: - Any: result of the outcome - """ - return retry_state.outcome.result() - - @retry( stop=stop_after_attempt(5), wait=wait_chain( @@ -176,7 +164,6 @@ def raise_final_error(retry_state: Any) -> Any: + [wait_fixed(5)] ), retry=retry_if_exception_type(HttpError), - retry_error_callback=raise_final_error, ) def google_api_execute_wrapper(api_function_to_call: Callable[[], Any]) -> Any: """Retry wrapper for Google API calls, with a backoff strategy. From 7d1f03a3f9ee0f281c838df2d5b4b04967871bf1 Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Thu, 3 Oct 2024 11:26:43 -0700 Subject: [PATCH 3/4] reriase exception --- schematic/utils/google_api_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/schematic/utils/google_api_utils.py b/schematic/utils/google_api_utils.py index e46a1819c..9f4a3105e 100644 --- a/schematic/utils/google_api_utils.py +++ b/schematic/utils/google_api_utils.py @@ -164,6 +164,7 @@ def export_manifest_csv(file_path: str, manifest: Union[pd.DataFrame, str]) -> N + [wait_fixed(5)] ), retry=retry_if_exception_type(HttpError), + reraise=True, ) def google_api_execute_wrapper(api_function_to_call: Callable[[], Any]) -> Any: """Retry wrapper for Google API calls, with a backoff strategy. From 8557b97ec92fcf811d55cfe029e8be1b2c7ba6bb Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Thu, 3 Oct 2024 11:31:02 -0700 Subject: [PATCH 4/4] ignore type for mypy --- schematic/utils/google_api_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schematic/utils/google_api_utils.py b/schematic/utils/google_api_utils.py index 9f4a3105e..6f09c0ea7 100644 --- a/schematic/utils/google_api_utils.py +++ b/schematic/utils/google_api_utils.py @@ -10,7 +10,7 @@ import pandas as pd from google.oauth2 import service_account # type: ignore from googleapiclient.discovery import Resource, build # type: ignore -from googleapiclient.errors import HttpError +from googleapiclient.errors import HttpError # type: ignore from tenacity import ( retry, retry_if_exception_type,