From 4d997568b9b4b207c11eb7c25f485b7d46fac542 Mon Sep 17 00:00:00 2001 From: Ahmed Sheta Date: Sat, 3 Feb 2024 02:19:09 +0100 Subject: [PATCH 1/8] inital commit to fix the file names bug Signed-off-by: Ahmed Sheta --- src/database/leads/local_repository.py | 4 +- src/database/leads/s3_repository.py | 4 +- src/demo/demos.py | 72 ++++++++++++++++++++++++-- src/preprocessing/preprocessing.py | 58 +++++++++++---------- 4 files changed, 107 insertions(+), 31 deletions(-) diff --git a/src/database/leads/local_repository.py b/src/database/leads/local_repository.py index ebeb90b..c5e53e4 100644 --- a/src/database/leads/local_repository.py +++ b/src/database/leads/local_repository.py @@ -249,7 +249,9 @@ def save_classification_report(self, report, model_name: str): except Exception as e: log.error(f"Could not save report at {report_file_path}! Error: {str(e)}") - def load_preprocessed_data(self, file_name: str = "preprocessed_data.csv"): + def load_preprocessed_data( + self, file_name: str = "historical_preprocessed_data.csv" + ): try: return pd.read_csv(os.path.join(self.DF_PREPROCESSED_INPUT, file_name)) except FileNotFoundError: diff --git a/src/database/leads/s3_repository.py b/src/database/leads/s3_repository.py index 4264ef4..2e11ed5 100644 --- a/src/database/leads/s3_repository.py +++ b/src/database/leads/s3_repository.py @@ -374,7 +374,9 @@ def save_classification_report(self, report, model_name: str): except Exception as e: log.error(f"Could not save report for '{model_name}' to S3: {str(e)}") - def load_preprocessed_data(self, file_name: str = "preprocessed_data.csv"): + def load_preprocessed_data( + self, file_name: str = "historical_preprocessed_data.csv" + ): file_path = self.DF_PREPROCESSED_INPUT + file_name if not file_path.startswith("s3://"): log.error( diff --git a/src/demo/demos.py b/src/demo/demos.py index c4de78d..41d3054 100644 --- a/src/demo/demos.py +++ b/src/demo/demos.py @@ -10,6 +10,7 @@ import re import subprocess +import pandas as pd import xgboost as xgb from sklearn.metrics import classification_report @@ -40,6 +41,7 @@ INPUT_FILE_BDC = "../data/sumup_leads_email.csv" OUTPUT_FILE_BDC = "../data/collected_data.json" + # evp demo def evp_demo(): data = get_database().load_preprocessed_data() @@ -212,19 +214,71 @@ def pipeline_demo(): def preprocessing_demo(): - if get_yes_no_input("Filter out the API-irrelevant data? (y/n)"): + if get_yes_no_input("Filter out the API-irrelevant data? (y/n)\n"): filter_bool = True else: filter_bool = False if get_yes_no_input( - "Run on historical data ? (y/n)\nNote: DATABASE_TYPE should be S3!" + "Run on historical data ? (y/n)\n'n' means it will run on lead data!\n" ): historical_bool = True else: historical_bool = False + if get_yes_no_input("Run on S3? (y/n)\n'n' means it will run locally!\n"): + S3_bool = True + else: + S3_bool = False + preprocessor = Preprocessing( filter_null_data=filter_bool, historical_data=historical_bool ) + if historical_bool and S3_bool: + preprocessor.data_path = ( + "s3://amos--data--events/historical_data/100k_historic_enriched.csv" + ) + preprocessor.prerocessed_data_output_path = "s3://amos--data--features/preprocessed_data_files/historical_preprocessed_data.csv" + elif historical_bool and not S3_bool: + # input path + input_path_components = preprocessor.data_path.split( + "\\" if "\\" in preprocessor.data_path else "/" + ) + input_path_components.pop() + input_path_components.append("100k_historic_enriched.csv") + input_path = "/".join(input_path_components) + preprocessor.data_path = input_path + + # output path + path_components = preprocessor.data_path.split( + "\\" if "\\" in preprocessor.data_path else "/" + ) + path_components.pop() + path_components.append( + "preprocessed_data_files/historical_preprocessed_data.csv" + ) + preprocessor.prerocessed_data_output_path = "/".join(path_components) + elif not historical_bool and S3_bool: + preprocessor.data_path = "s3://amos--data--events/leads/enriched.csv" + preprocessor.prerocessed_data_output_path = "s3://amos--data--features/preprocessed_data_files/leads_preprocessed_data.csv" + elif not historical_bool and not S3_bool: + # input path + input_path_components = preprocessor.data_path.split( + "\\" if "\\" in preprocessor.data_path else "/" + ) + input_path_components.pop() + input_path_components.append("leads_enriched.csv") + input_path = "/".join(input_path_components) + preprocessor.data_path = input_path + + # output path + path_components = preprocessor.data_path.split( + "\\" if "\\" in preprocessor.data_path else "/" + ) + path_components.pop() + path_components.append("preprocessed_data_files/leads_preprocessed_data.csv") + preprocessor.prerocessed_data_output_path = "/".join(path_components) + + preprocessor.preprocessed_df = pd.read_csv(preprocessor.data_path) + df = preprocessor.implement_preprocessing_pipeline() preprocessor.save_preprocessed_data() @@ -250,7 +304,18 @@ def predict_MerchantSize_on_lead_data_demo(): from preprocessing import Preprocessing preprocessor = Preprocessing(filter_null_data=False, historical_data=False) - leads_enriched_path = "s3://amos--data--events/leads/enriched.csv" + + leads_enriched_path = "s3://amos--data--events/leads/enriched.csv" # S3 path + + # # input path + # input_path_components = preprocessor.data_path.split( + # "\\" if "\\" in preprocessor.data_path else "/" + # ) + # input_path_components.pop() + # input_path_components.append("leads_enriched.csv") + # input_path = "/".join(input_path_components) # local path + # preprocessor.data_path = input_path + if not leads_enriched_path: log.error( "No such file exists in the directory s3://amos--data--events/leads/enriched.csv" @@ -259,6 +324,7 @@ def predict_MerchantSize_on_lead_data_demo(): preprocessor.prerocessed_data_output_path = ( "s3://amos--data--events/leads/preprocessed_leads_data.csv" ) + preprocessor.preprocessed_df = pd.read_csv(leads_enriched_path) df = preprocessor.implement_preprocessing_pipeline() preprocessor.save_preprocessed_data() diff --git a/src/preprocessing/preprocessing.py b/src/preprocessing/preprocessing.py index 78f7c06..f47510b 100644 --- a/src/preprocessing/preprocessing.py +++ b/src/preprocessing/preprocessing.py @@ -32,31 +32,34 @@ class Preprocessing: def __init__(self, filter_null_data=True, historical_data=False): data_repo = get_database() self.data_path = data_repo.get_output_path() - if historical_data: - input_path_components = self.data_path.split( - "\\" if "\\" in self.data_path else "/" - ) - input_path_components.pop() - input_path_components.pop() - input_path_components.append("historical_data/100k_historic_enriched.csv") - input_path = "/".join(input_path_components) - data = pd.read_csv(input_path) - log.debug(f"Data path = {input_path}") - else: - log.debug(f"Data path = {self.data_path}") - data = pd.read_csv(self.data_path) - self.preprocessed_df = data.copy() - - if historical_data: - self.prerocessed_data_output_path = "s3://amos--data--features/preprocessed_data_files/preprocessed_data.csv" - else: - # created the new output path based on which repo used - path_components = self.data_path.split( - "\\" if "\\" in self.data_path else "/" - ) - path_components.pop() - path_components.append("preprocessed_data.csv") - self.prerocessed_data_output_path = "/".join(path_components) + self.preprocessed_df = None + self.prerocessed_data_output_path = None + # if historical_data: + # input_path_components = self.data_path.split( + # "\\" if "\\" in self.data_path else "/" + # ) + # input_path_components.pop() + # input_path_components.pop() + # input_path_components.append("historical_data/100k_historic_enriched.csv") + # input_path = "/".join(input_path_components) + # data = pd.read_csv(input_path) + # log.debug(f"Data path = {input_path}") + # self.preprocessed_df = data.copy() + # else: + # log.debug(f"Data path = {self.data_path}") + # data = pd.read_csv(self.data_path) + # self.preprocessed_df = data.copy() + + # if historical_data: + # self.prerocessed_data_output_path = "s3://amos--data--features/preprocessed_data_files/preprocessed_data.csv" + # else: + # # created the new output path based on which repo used + # path_components = self.data_path.split( + # "\\" if "\\" in self.data_path else "/" + # ) + # path_components.pop() + # path_components.append("preprocessed_data_files/preprocessed_data.csv") + # self.prerocessed_data_output_path = "/".join(path_components) self.filter_bool = filter_null_data # columns that would be added later after one-hot encoding each class @@ -114,7 +117,10 @@ def filter_out_null_data(self): ] def fill_missing_values(self, column, strategy="constant"): - if column in self.preprocessed_df.columns: + if ( + column in self.preprocessed_df.columns + and not self.preprocessed_df[column].empty + ): imputer = SimpleImputer(strategy=strategy) self.preprocessed_df[column] = imputer.fit_transform( self.preprocessed_df[[column]] From 93ba51cb6336d5cdb95bca82865820357105deac Mon Sep 17 00:00:00 2001 From: Ahmed Sheta Date: Sat, 3 Feb 2024 05:21:10 +0100 Subject: [PATCH 2/8] fixed bug and now the pipeline can run locally Signed-off-by: Ahmed Sheta --- src/demo/demos.py | 151 +++++++++++++---------------- src/preprocessing/preprocessing.py | 74 ++++++++------ 2 files changed, 113 insertions(+), 112 deletions(-) diff --git a/src/demo/demos.py b/src/demo/demos.py index 41d3054..83723b2 100644 --- a/src/demo/demos.py +++ b/src/demo/demos.py @@ -230,52 +230,8 @@ def preprocessing_demo(): S3_bool = False preprocessor = Preprocessing( - filter_null_data=filter_bool, historical_data=historical_bool + filter_null_data=filter_bool, historical_bool=historical_bool, S3_bool=S3_bool ) - if historical_bool and S3_bool: - preprocessor.data_path = ( - "s3://amos--data--events/historical_data/100k_historic_enriched.csv" - ) - preprocessor.prerocessed_data_output_path = "s3://amos--data--features/preprocessed_data_files/historical_preprocessed_data.csv" - elif historical_bool and not S3_bool: - # input path - input_path_components = preprocessor.data_path.split( - "\\" if "\\" in preprocessor.data_path else "/" - ) - input_path_components.pop() - input_path_components.append("100k_historic_enriched.csv") - input_path = "/".join(input_path_components) - preprocessor.data_path = input_path - - # output path - path_components = preprocessor.data_path.split( - "\\" if "\\" in preprocessor.data_path else "/" - ) - path_components.pop() - path_components.append( - "preprocessed_data_files/historical_preprocessed_data.csv" - ) - preprocessor.prerocessed_data_output_path = "/".join(path_components) - elif not historical_bool and S3_bool: - preprocessor.data_path = "s3://amos--data--events/leads/enriched.csv" - preprocessor.prerocessed_data_output_path = "s3://amos--data--features/preprocessed_data_files/leads_preprocessed_data.csv" - elif not historical_bool and not S3_bool: - # input path - input_path_components = preprocessor.data_path.split( - "\\" if "\\" in preprocessor.data_path else "/" - ) - input_path_components.pop() - input_path_components.append("leads_enriched.csv") - input_path = "/".join(input_path_components) - preprocessor.data_path = input_path - - # output path - path_components = preprocessor.data_path.split( - "\\" if "\\" in preprocessor.data_path else "/" - ) - path_components.pop() - path_components.append("preprocessed_data_files/leads_preprocessed_data.csv") - preprocessor.prerocessed_data_output_path = "/".join(path_components) preprocessor.preprocessed_df = pd.read_csv(preprocessor.data_path) @@ -294,37 +250,23 @@ def predict_MerchantSize_on_lead_data_demo(): import pandas as pd log.info( - "Note: Enriched data must be located at s3://amos--data--events/leads/enriched.csv" + "Note: In case of running locally, enriched data must be located at src/data/leads_enriched.csv locally\nIn case of running on S3, enriched data must be located at s3://amos--data--events/leads/enriched.csv or" ) ######################### preprocessing the leads ################################## + if get_yes_no_input("Run on S3? (y/n)\n'n' means it will run locally!\n"): + S3_bool = True + else: + S3_bool = False current_dir = os.path.dirname(__file__) if "__file__" in locals() else os.getcwd() parent_dir = os.path.join(current_dir, "..") sys.path.append(parent_dir) from preprocessing import Preprocessing - preprocessor = Preprocessing(filter_null_data=False, historical_data=False) - - leads_enriched_path = "s3://amos--data--events/leads/enriched.csv" # S3 path - - # # input path - # input_path_components = preprocessor.data_path.split( - # "\\" if "\\" in preprocessor.data_path else "/" - # ) - # input_path_components.pop() - # input_path_components.append("leads_enriched.csv") - # input_path = "/".join(input_path_components) # local path - # preprocessor.data_path = input_path - - if not leads_enriched_path: - log.error( - "No such file exists in the directory s3://amos--data--events/leads/enriched.csv" - ) - preprocessor.data_path = leads_enriched_path - preprocessor.prerocessed_data_output_path = ( - "s3://amos--data--events/leads/preprocessed_leads_data.csv" + preprocessor = Preprocessing( + filter_null_data=False, historical_bool=False, S3_bool=S3_bool ) - preprocessor.preprocessed_df = pd.read_csv(leads_enriched_path) + preprocessor.preprocessed_df = pd.read_csv(preprocessor.data_path) df = preprocessor.implement_preprocessing_pipeline() preprocessor.save_preprocessed_data() @@ -333,9 +275,18 @@ def predict_MerchantSize_on_lead_data_demo(): historical_preprocessed_data = pd.read_csv( "s3://amos--data--features/preprocessed_data_files/preprocessed_data.csv" ) - toBePredicted_preprocessed_data = pd.read_csv( - "s3://amos--data--events/leads/preprocessed_leads_data.csv" - ) + if S3_bool: + toBePredicted_preprocessed_data = pd.read_csv( + "s3://amos--data--events/leads/preprocessed_leads_data.csv" + ) + else: + path_components = preprocessor.data_path.split( + "\\" if "\\" in preprocessor.data_path else "/" + ) + path_components.pop() + path_components.append("preprocessed_data_files/leads_preprocessed_data.csv") + leads_preprocessed_data_path = "/".join(path_components) + toBePredicted_preprocessed_data = pd.read_csv(leads_preprocessed_data_path) historical_columns_order = historical_preprocessed_data.columns @@ -355,11 +306,21 @@ def predict_MerchantSize_on_lead_data_demo(): toBePredicted_preprocessed_data = toBePredicted_preprocessed_data[ historical_columns_order ] - - toBePredicted_preprocessed_data.to_csv( - "s3://amos--data--events/leads/toBePredicted_preprocessed_data_updated.csv", - index=False, - ) + if S3_bool: + toBePredicted_preprocessed_data.to_csv( + "s3://amos--data--events/leads/toBePredicted_preprocessed_data_updated.csv", + index=False, + ) + else: + path_components = preprocessor.data_path.split( + "\\" if "\\" in preprocessor.data_path else "/" + ) + path_components.pop() + path_components.append("toBePredicted_preprocessed_data_updated.csv") + local_preprocessed_data_path = "/".join(path_components) + toBePredicted_preprocessed_data.to_csv( + local_preprocessed_data_path, index=False + ) # check if columns in both dataframe are in same order and same number assert list(toBePredicted_preprocessed_data.columns) == list( @@ -403,9 +364,13 @@ def check_classification_task(string): model = joblib.load(model_file) log.info(f"Loaded the model sucessfully!") - data_path = ( - "s3://amos--data--events/leads/toBePredicted_preprocessed_data_updated.csv" - ) + if S3_bool: + data_path = ( + "s3://amos--data--events/leads/toBePredicted_preprocessed_data_updated.csv" + ) + else: + data_path = local_preprocessed_data_path + df = pd.read_csv(data_path) input = df.drop("MerchantSizeByDPV", axis=1) if xgb_bool: @@ -418,15 +383,31 @@ def check_classification_task(string): size_mapping = {0: "XS", 1: "S", 2: "M", 3: "L", 4: "XL"} remapped_predictions = [size_mapping[prediction] for prediction in predictions] - enriched_data = pd.read_csv("s3://amos--data--events/leads/enriched.csv") + if S3_bool: + enriched_data = pd.read_csv("s3://amos--data--events/leads/enriched.csv") + else: + enriched_data = pd.read_csv(preprocessor.data_path) # first 5 columns: Last Name,First Name,Company / Account,Phone,Email, raw_data = enriched_data.iloc[:, :5] + print(f"raw_data = {raw_data.shape}") + print(f"remapped_predictions = {len(remapped_predictions)}") raw_data["PredictedMerchantSize"] = remapped_predictions - raw_data.to_csv( - "s3://amos--data--events/leads/predicted_MerchantSize_of_leads.csv", index=True - ) - log.info( - f"Saved the predicted Merchant Size of the leads at s3://amos--data--events/leads/predicted_MerchantSize_of_leads.csv" - ) + if S3_bool: + raw_data.to_csv( + "s3://amos--data--events/leads/predicted_MerchantSize_of_leads.csv", + index=True, + ) + log.info( + f"Saved the predicted Merchant Size of the leads at s3://amos--data--events/leads/predicted_MerchantSize_of_leads.csv" + ) + else: + path_components = preprocessor.data_path.split( + "\\" if "\\" in preprocessor.data_path else "/" + ) + path_components.pop() + path_components.append("predicted_MerchantSize_of_leads.csv") + output_path = "/".join(path_components) + raw_data.to_csv(output_path, index=True) + log.info(f"Saved the predicted Merchant Size of the leads at {output_path}") diff --git a/src/preprocessing/preprocessing.py b/src/preprocessing/preprocessing.py index f47510b..a278b2a 100644 --- a/src/preprocessing/preprocessing.py +++ b/src/preprocessing/preprocessing.py @@ -29,37 +29,57 @@ class Preprocessing: - def __init__(self, filter_null_data=True, historical_data=False): + def __init__(self, filter_null_data=True, historical_bool=True, S3_bool=False): data_repo = get_database() self.data_path = data_repo.get_output_path() self.preprocessed_df = None self.prerocessed_data_output_path = None - # if historical_data: - # input_path_components = self.data_path.split( - # "\\" if "\\" in self.data_path else "/" - # ) - # input_path_components.pop() - # input_path_components.pop() - # input_path_components.append("historical_data/100k_historic_enriched.csv") - # input_path = "/".join(input_path_components) - # data = pd.read_csv(input_path) - # log.debug(f"Data path = {input_path}") - # self.preprocessed_df = data.copy() - # else: - # log.debug(f"Data path = {self.data_path}") - # data = pd.read_csv(self.data_path) - # self.preprocessed_df = data.copy() - - # if historical_data: - # self.prerocessed_data_output_path = "s3://amos--data--features/preprocessed_data_files/preprocessed_data.csv" - # else: - # # created the new output path based on which repo used - # path_components = self.data_path.split( - # "\\" if "\\" in self.data_path else "/" - # ) - # path_components.pop() - # path_components.append("preprocessed_data_files/preprocessed_data.csv") - # self.prerocessed_data_output_path = "/".join(path_components) + if historical_bool and S3_bool: + self.data_path = ( + "s3://amos--data--events/historical_data/100k_historic_enriched.csv" + ) + self.prerocessed_data_output_path = "s3://amos--data--features/preprocessed_data_files/historical_preprocessed_data.csv" + elif historical_bool and not S3_bool: + # input path + input_path_components = self.data_path.split( + "\\" if "\\" in self.data_path else "/" + ) + input_path_components.pop() + input_path_components.append("100k_historic_enriched.csv") + input_path = "/".join(input_path_components) + self.data_path = input_path + + # output path + path_components = self.data_path.split( + "\\" if "\\" in self.data_path else "/" + ) + path_components.pop() + path_components.append( + "preprocessed_data_files/historical_preprocessed_data.csv" + ) + self.prerocessed_data_output_path = "/".join(path_components) + elif not historical_bool and S3_bool: + self.data_path = "s3://amos--data--events/leads/enriched.csv" + self.prerocessed_data_output_path = "s3://amos--data--features/preprocessed_data_files/leads_preprocessed_data.csv" + elif not historical_bool and not S3_bool: + # input path + input_path_components = self.data_path.split( + "\\" if "\\" in self.data_path else "/" + ) + input_path_components.pop() + input_path_components.append("leads_enriched.csv") + input_path = "/".join(input_path_components) + self.data_path = input_path + + # output path + path_components = self.data_path.split( + "\\" if "\\" in self.data_path else "/" + ) + path_components.pop() + path_components.append( + "preprocessed_data_files/leads_preprocessed_data.csv" + ) + self.prerocessed_data_output_path = "/".join(path_components) self.filter_bool = filter_null_data # columns that would be added later after one-hot encoding each class From aa8ced84ae0501678be7172956c577a24bc2b0d2 Mon Sep 17 00:00:00 2001 From: Ahmed Sheta Date: Sat, 3 Feb 2024 22:29:48 +0100 Subject: [PATCH 3/8] updated pipfile, removed debugging prints, added logs Signed-off-by: Ahmed Sheta --- Pipfile.lock | 430 ++++++++++++++++++++-------------------------- src/demo/demos.py | 16 +- 2 files changed, 204 insertions(+), 242 deletions(-) diff --git a/Pipfile.lock b/Pipfile.lock index f6b083a..d3f4ad6 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "9e3d29b16e3d34d5c059c059728a9a36510bd8554044aa224a482cc910d553c1" + "sha256": "8d74161673d9b82cb7933149388452406f1efaf7a82db95bfd11997ef8b36d33" }, "pipfile-spec": 6, "requires": { @@ -122,14 +122,6 @@ "markers": "python_version >= '3.7'", "version": "==1.3.1" }, - "alabaster": { - "hashes": [ - "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65", - "sha256:b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92" - ], - "markers": "python_version >= '3.9'", - "version": "==0.7.16" - }, "annotated-types": { "hashes": [ "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43", @@ -169,14 +161,6 @@ "index": "pypi", "version": "==2.6.1" }, - "babel": { - "hashes": [ - "sha256:6919867db036398ba21eb5c7a0f6b28ab8cbc3ae7a73a44ebe34ae74a4e7d363", - "sha256:efb1a25b7118e67ce3a259bed20545c29cb68be8ad2c784c83689981b7a57287" - ], - "markers": "python_version >= '3.7'", - "version": "==2.14.0" - }, "beautifulsoup4": { "hashes": [ "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da", @@ -327,7 +311,7 @@ "sha256:a4bc13d623356b373c2c27c53dbd9c68cae5d526270bfa71f6c6fa69669c6b27", "sha256:c1ca117dbce1fe20a5809dc96f01e1c2840f6dcc939b3ddbb1111bf330ba82df" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2' and python_version < '4'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' and python_version < '4'", "version": "==0.7.2" }, "colorama": { @@ -517,14 +501,6 @@ "markers": "python_version >= '3.8'", "version": "==2.5.0" }, - "docutils": { - "hashes": [ - "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6", - "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b" - ], - "markers": "python_version >= '3.7'", - "version": "==0.20.1" - }, "email-validator": { "hashes": [ "sha256:a4b0bd1cf55f073b924258d19321b1f3aa74b4b5a71a42c305575dba920e1a44", @@ -736,14 +712,6 @@ "markers": "python_version >= '3.5'", "version": "==3.6" }, - "imagesize": { - "hashes": [ - "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b", - "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==1.4.1" - }, "jinja2": { "hashes": [ "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa", @@ -888,14 +856,6 @@ "markers": "python_version >= '3.8' and python_version < '4.0'", "version": "==2.0.1" }, - "markdown-it-py": { - "hashes": [ - "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", - "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb" - ], - "markers": "python_version >= '3.8'", - "version": "==3.0.0" - }, "markupsafe": { "hashes": [ "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf", @@ -962,22 +922,6 @@ "markers": "python_version >= '3.7'", "version": "==2.1.5" }, - "mdit-py-plugins": { - "hashes": [ - "sha256:b51b3bb70691f57f974e257e367107857a93b36f322a9e6d44ca5bf28ec2def9", - "sha256:d8ab27e9aed6c38aa716819fedfde15ca275715955f8a185a8e1cf90fb1d2c1b" - ], - "markers": "python_version >= '3.8'", - "version": "==0.4.0" - }, - "mdurl": { - "hashes": [ - "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", - "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba" - ], - "markers": "python_version >= '3.7'", - "version": "==0.1.2" - }, "more-itertools": { "hashes": [ "sha256:1bc4f91ee5b1b31ac7ceacc17c09befe6a40a503907baf9c839c229b5095cfd2", @@ -1089,14 +1033,6 @@ "markers": "python_version >= '3.7'", "version": "==6.0.5" }, - "myst-parser": { - "hashes": [ - "sha256:7c36344ae39c8e740dad7fdabf5aa6fc4897a813083c6cc9990044eb93656b14", - "sha256:ea929a67a6a0b1683cdbe19b8d2e724cd7643f8aa3e7bb18dd65beac3483bead" - ], - "markers": "python_version >= '3.8'", - "version": "==2.0.0" - }, "networkx": { "hashes": [ "sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6", @@ -1481,14 +1417,6 @@ "markers": "python_version >= '3.8'", "version": "==2.16.1" }, - "pygments": { - "hashes": [ - "sha256:b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c", - "sha256:da46cec9fd2de5be3a8a784f434e4c4ab670b4ff54d605c4c2717e9d49c4c367" - ], - "markers": "python_version >= '3.7'", - "version": "==2.17.2" - }, "pylanguagetool": { "hashes": [ "sha256:406629d7ed1a78d95499ebebc7f5a4950f714904a8117edb78f89757fcd90fbe", @@ -1531,6 +1459,14 @@ "markers": "python_version >= '3.9'", "version": "==3.6.1" }, + "pyreadline3": { + "hashes": [ + "sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae", + "sha256:b0efb6516fd4fb07b45949053826a62fa4cb353db5be2bbb4a7aa1fdd1e345fb" + ], + "markers": "python_version >= '3.8' and sys_platform == 'win32'", + "version": "==3.4.1" + }, "pyspellchecker": { "hashes": [ "sha256:b5ef23437702b8d03626f814b9646779b572d378b325ad252d8a8e616b3d76db", @@ -1545,7 +1481,7 @@ "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.8.2" }, "python-dotenv": { @@ -1564,63 +1500,6 @@ ], "version": "==2024.1" }, - "pyyaml": { - "hashes": [ - "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5", - "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc", - "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df", - "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741", - "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206", - "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27", - "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595", - "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62", - "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98", - "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696", - "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290", - "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9", - "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d", - "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6", - "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867", - "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47", - "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486", - "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6", - "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3", - "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007", - "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938", - "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0", - "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c", - "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735", - "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d", - "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28", - "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4", - "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba", - "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8", - "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef", - "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5", - "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd", - "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3", - "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0", - "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515", - "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c", - "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c", - "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924", - "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34", - "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43", - "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859", - "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673", - "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54", - "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a", - "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b", - "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab", - "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa", - "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c", - "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585", - "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d", - "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f" - ], - "markers": "python_version >= '3.6'", - "version": "==6.0.1" - }, "regex": { "hashes": [ "sha256:0694219a1d54336fd0445ea382d49d36882415c0134ee1e8332afd1529f0baa5", @@ -1880,7 +1759,7 @@ "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.16.0" }, "sniffio": { @@ -1891,13 +1770,6 @@ "markers": "python_version >= '3.7'", "version": "==1.3.0" }, - "snowballstemmer": { - "hashes": [ - "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1", - "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a" - ], - "version": "==2.2.0" - }, "soupsieve": { "hashes": [ "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690", @@ -1906,79 +1778,6 @@ "markers": "python_version >= '3.8'", "version": "==2.5" }, - "sphinx": { - "hashes": [ - "sha256:1e09160a40b956dc623c910118fa636da93bd3ca0b9876a7b3df90f07d691560", - "sha256:9a5160e1ea90688d5963ba09a2dcd8bdd526620edbb65c328728f1b2228d5ab5" - ], - "index": "pypi", - "markers": "python_version >= '3.9'", - "version": "==7.2.6" - }, - "sphinx-rtd-theme": { - "hashes": [ - "sha256:bd5d7b80622406762073a04ef8fadc5f9151261563d47027de09910ce03afe6b", - "sha256:ec93d0856dc280cf3aee9a4c9807c60e027c7f7b461b77aeffed682e68f0e586" - ], - "markers": "python_version >= '3.6'", - "version": "==2.0.0" - }, - "sphinxcontrib-applehelp": { - "hashes": [ - "sha256:c40a4f96f3776c4393d933412053962fac2b84f4c99a7982ba42e09576a70619", - "sha256:cb61eb0ec1b61f349e5cc36b2028e9e7ca765be05e49641c97241274753067b4" - ], - "markers": "python_version >= '3.9'", - "version": "==1.0.8" - }, - "sphinxcontrib-devhelp": { - "hashes": [ - "sha256:6485d09629944511c893fa11355bda18b742b83a2b181f9a009f7e500595c90f", - "sha256:9893fd3f90506bc4b97bdb977ceb8fbd823989f4316b28c3841ec128544372d3" - ], - "markers": "python_version >= '3.9'", - "version": "==1.0.6" - }, - "sphinxcontrib-htmlhelp": { - "hashes": [ - "sha256:0dc87637d5de53dd5eec3a6a01753b1ccf99494bd756aafecd74b4fa9e729015", - "sha256:393f04f112b4d2f53d93448d4bce35842f62b307ccdc549ec1585e950bc35e04" - ], - "markers": "python_version >= '3.9'", - "version": "==2.0.5" - }, - "sphinxcontrib-jquery": { - "hashes": [ - "sha256:1620739f04e36a2c779f1a131a2dfd49b2fd07351bf1968ced074365933abc7a", - "sha256:f936030d7d0147dd026a4f2b5a57343d233f1fc7b363f68b3d4f1cb0993878ae" - ], - "markers": "python_version >= '2.7'", - "version": "==4.1" - }, - "sphinxcontrib-jsmath": { - "hashes": [ - "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", - "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8" - ], - "markers": "python_version >= '3.5'", - "version": "==1.0.1" - }, - "sphinxcontrib-qthelp": { - "hashes": [ - "sha256:053dedc38823a80a7209a80860b16b722e9e0209e32fea98c90e4e6624588ed6", - "sha256:e2ae3b5c492d58fcbd73281fbd27e34b8393ec34a073c792642cd8e529288182" - ], - "markers": "python_version >= '3.9'", - "version": "==1.0.7" - }, - "sphinxcontrib-serializinghtml": { - "hashes": [ - "sha256:326369b8df80a7d2d8d7f99aa5ac577f51ea51556ed974e7716cfd4fca3f6cb7", - "sha256:93f3f5dc458b91b192fe10c397e324f262cf163d79f3282c158e8436a2c4511f" - ], - "markers": "python_version >= '3.9'", - "version": "==1.1.10" - }, "sympy": { "hashes": [ "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5", @@ -2294,6 +2093,14 @@ } }, "develop": { + "alabaster": { + "hashes": [ + "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65", + "sha256:b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92" + ], + "markers": "python_version >= '3.9'", + "version": "==0.7.16" + }, "anyio": { "hashes": [ "sha256:44a3c9aba0f5defa43261a8b3efb97891f2bd7d804e0e1f56419befa1adfc780", @@ -2302,14 +2109,6 @@ "markers": "python_version >= '3.7'", "version": "==3.7.1" }, - "appnope": { - "hashes": [ - "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24", - "sha256:265a455292d0bd8a72453494fa24df5a11eb18373a60c7c0430889f22548605e" - ], - "markers": "platform_system == 'Darwin'", - "version": "==0.1.3" - }, "argon2-cffi": { "hashes": [ "sha256:879c3e79a2729ce768ebb7d36d4609e3a78a4ca2ec3a9f12286ca057e3d0db08", @@ -2571,6 +2370,15 @@ "markers": "python_full_version >= '3.7.0'", "version": "==3.3.2" }, + "colorama": { + "hashes": [ + "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", + "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" + ], + "index": "pypi", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'", + "version": "==0.4.6" + }, "comm": { "hashes": [ "sha256:0bc91edae1344d39d3661dcbc36937181fdaddb304790458f8b044dbc064b89a", @@ -2743,6 +2551,14 @@ ], "version": "==0.3.8" }, + "docutils": { + "hashes": [ + "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6", + "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b" + ], + "markers": "python_version >= '3.7'", + "version": "==0.20.1" + }, "exceptiongroup": { "hashes": [ "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14", @@ -2871,6 +2687,14 @@ "markers": "python_version >= '3.5'", "version": "==3.6" }, + "imagesize": { + "hashes": [ + "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b", + "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.4.1" + }, "iniconfig": { "hashes": [ "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", @@ -3131,6 +2955,14 @@ "markers": "python_version >= '3.7'", "version": "==1.4.5" }, + "markdown-it-py": { + "hashes": [ + "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", + "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb" + ], + "markers": "python_version >= '3.8'", + "version": "==3.0.0" + }, "markupsafe": { "hashes": [ "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf", @@ -3248,6 +3080,22 @@ "markers": "python_version >= '3.6'", "version": "==0.7.0" }, + "mdit-py-plugins": { + "hashes": [ + "sha256:b51b3bb70691f57f974e257e367107857a93b36f322a9e6d44ca5bf28ec2def9", + "sha256:d8ab27e9aed6c38aa716819fedfde15ca275715955f8a185a8e1cf90fb1d2c1b" + ], + "markers": "python_version >= '3.8'", + "version": "==0.4.0" + }, + "mdurl": { + "hashes": [ + "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", + "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba" + ], + "markers": "python_version >= '3.7'", + "version": "==0.1.2" + }, "mistune": { "hashes": [ "sha256:71481854c30fdbc938963d3605b72501f5c10a9320ecd412c121c163a1c7d205", @@ -3256,6 +3104,14 @@ "markers": "python_version >= '3.7'", "version": "==3.0.2" }, + "myst-parser": { + "hashes": [ + "sha256:7c36344ae39c8e740dad7fdabf5aa6fc4897a813083c6cc9990044eb93656b14", + "sha256:ea929a67a6a0b1683cdbe19b8d2e724cd7643f8aa3e7bb18dd65beac3483bead" + ], + "markers": "python_version >= '3.8'", + "version": "==2.0.0" + }, "nbclient": { "hashes": [ "sha256:4b28c207877cf33ef3a9838cdc7a54c5ceff981194a82eac59d558f05487295e", @@ -3384,14 +3240,6 @@ "markers": "python_version >= '3.6'", "version": "==0.8.3" }, - "pexpect": { - "hashes": [ - "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", - "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f" - ], - "markers": "sys_platform != 'win32'", - "version": "==4.9.0" - }, "pillow": { "hashes": [ "sha256:0304004f8067386b477d20a518b50f3fa658a28d44e4116970abfcd94fac34a8", @@ -3538,14 +3386,6 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", "version": "==5.9.8" }, - "ptyprocess": { - "hashes": [ - "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", - "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220" - ], - "markers": "os_name != 'nt'", - "version": "==0.7.0" - }, "pure-eval": { "hashes": [ "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350", @@ -3615,7 +3455,7 @@ "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.8.2" }, "python-json-logger": { @@ -3626,6 +3466,38 @@ "markers": "python_version >= '3.6'", "version": "==2.0.7" }, + "pywin32": { + "hashes": [ + "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d", + "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65", + "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e", + "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b", + "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4", + "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040", + "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a", + "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36", + "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8", + "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e", + "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802", + "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a", + "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407", + "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0" + ], + "markers": "sys_platform == 'win32' and platform_python_implementation != 'PyPy'", + "version": "==306" + }, + "pywinpty": { + "hashes": [ + "sha256:1617b729999eb6713590e17665052b1a6ae0ad76ee31e60b444147c5b6a35dca", + "sha256:189380469ca143d06e19e19ff3fba0fcefe8b4a8cc942140a6b863aed7eebb2d", + "sha256:21319cd1d7c8844fb2c970fb3a55a3db5543f112ff9cfcd623746b9c47501575", + "sha256:7520575b6546db23e693cbd865db2764097bd6d4ef5dc18c92555904cd62c3d4", + "sha256:8197de460ae8ebb7f5d1701dfa1b5df45b157bb832e92acba316305e18ca00dd", + "sha256:853985a8f48f4731a716653170cd735da36ffbdc79dcb4c7b7140bce11d8c722" + ], + "markers": "os_name == 'nt'", + "version": "==2.0.12" + }, "pyyaml": { "hashes": [ "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5", @@ -3941,7 +3813,7 @@ "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.16.0" }, "sniffio": { @@ -3952,6 +3824,13 @@ "markers": "python_version >= '3.7'", "version": "==1.3.0" }, + "snowballstemmer": { + "hashes": [ + "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1", + "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a" + ], + "version": "==2.2.0" + }, "soupsieve": { "hashes": [ "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690", @@ -3960,6 +3839,79 @@ "markers": "python_version >= '3.8'", "version": "==2.5" }, + "sphinx": { + "hashes": [ + "sha256:1e09160a40b956dc623c910118fa636da93bd3ca0b9876a7b3df90f07d691560", + "sha256:9a5160e1ea90688d5963ba09a2dcd8bdd526620edbb65c328728f1b2228d5ab5" + ], + "index": "pypi", + "markers": "python_version >= '3.9'", + "version": "==7.2.6" + }, + "sphinx-rtd-theme": { + "hashes": [ + "sha256:bd5d7b80622406762073a04ef8fadc5f9151261563d47027de09910ce03afe6b", + "sha256:ec93d0856dc280cf3aee9a4c9807c60e027c7f7b461b77aeffed682e68f0e586" + ], + "markers": "python_version >= '3.6'", + "version": "==2.0.0" + }, + "sphinxcontrib-applehelp": { + "hashes": [ + "sha256:c40a4f96f3776c4393d933412053962fac2b84f4c99a7982ba42e09576a70619", + "sha256:cb61eb0ec1b61f349e5cc36b2028e9e7ca765be05e49641c97241274753067b4" + ], + "markers": "python_version >= '3.9'", + "version": "==1.0.8" + }, + "sphinxcontrib-devhelp": { + "hashes": [ + "sha256:6485d09629944511c893fa11355bda18b742b83a2b181f9a009f7e500595c90f", + "sha256:9893fd3f90506bc4b97bdb977ceb8fbd823989f4316b28c3841ec128544372d3" + ], + "markers": "python_version >= '3.9'", + "version": "==1.0.6" + }, + "sphinxcontrib-htmlhelp": { + "hashes": [ + "sha256:0dc87637d5de53dd5eec3a6a01753b1ccf99494bd756aafecd74b4fa9e729015", + "sha256:393f04f112b4d2f53d93448d4bce35842f62b307ccdc549ec1585e950bc35e04" + ], + "markers": "python_version >= '3.9'", + "version": "==2.0.5" + }, + "sphinxcontrib-jquery": { + "hashes": [ + "sha256:1620739f04e36a2c779f1a131a2dfd49b2fd07351bf1968ced074365933abc7a", + "sha256:f936030d7d0147dd026a4f2b5a57343d233f1fc7b363f68b3d4f1cb0993878ae" + ], + "markers": "python_version >= '2.7'", + "version": "==4.1" + }, + "sphinxcontrib-jsmath": { + "hashes": [ + "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", + "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8" + ], + "markers": "python_version >= '3.5'", + "version": "==1.0.1" + }, + "sphinxcontrib-qthelp": { + "hashes": [ + "sha256:053dedc38823a80a7209a80860b16b722e9e0209e32fea98c90e4e6624588ed6", + "sha256:e2ae3b5c492d58fcbd73281fbd27e34b8393ec34a073c792642cd8e529288182" + ], + "markers": "python_version >= '3.9'", + "version": "==1.0.7" + }, + "sphinxcontrib-serializinghtml": { + "hashes": [ + "sha256:326369b8df80a7d2d8d7f99aa5ac577f51ea51556ed974e7716cfd4fca3f6cb7", + "sha256:93f3f5dc458b91b192fe10c397e324f262cf163d79f3282c158e8436a2c4511f" + ], + "markers": "python_version >= '3.9'", + "version": "==1.1.10" + }, "stack-data": { "hashes": [ "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9", diff --git a/src/demo/demos.py b/src/demo/demos.py index 83723b2..7437aa1 100644 --- a/src/demo/demos.py +++ b/src/demo/demos.py @@ -254,6 +254,7 @@ def predict_MerchantSize_on_lead_data_demo(): ) ######################### preprocessing the leads ################################## + log.info(f"Preprocessing the leads!") if get_yes_no_input("Run on S3? (y/n)\n'n' means it will run locally!\n"): S3_bool = True else: @@ -271,6 +272,7 @@ def predict_MerchantSize_on_lead_data_demo(): preprocessor.save_preprocessed_data() ############################## adapting the preprocessing files ########################### + log.info(f"Adapting the leads' preprocessed data for the ML model!") # load the data from the CSV files historical_preprocessed_data = pd.read_csv( "s3://amos--data--features/preprocessed_data_files/preprocessed_data.csv" @@ -307,10 +309,17 @@ def predict_MerchantSize_on_lead_data_demo(): historical_columns_order ] if S3_bool: + log.info(f"Adapting the leads' preprocessed data for the ML model!") + toBePredicted_output_path_s3 = ( + "s3://amos--data--events/leads/toBePredicted_preprocessed_data_updated.csv" + ) toBePredicted_preprocessed_data.to_csv( - "s3://amos--data--events/leads/toBePredicted_preprocessed_data_updated.csv", + toBePredicted_output_path_s3, index=False, ) + log.info( + f"Saving the adapted preprocessed data at {toBePredicted_output_path_s3}" + ) else: path_components = preprocessor.data_path.split( "\\" if "\\" in preprocessor.data_path else "/" @@ -321,6 +330,9 @@ def predict_MerchantSize_on_lead_data_demo(): toBePredicted_preprocessed_data.to_csv( local_preprocessed_data_path, index=False ) + log.info( + f"Saving the adapted preprocessed data at {local_preprocessed_data_path}" + ) # check if columns in both dataframe are in same order and same number assert list(toBePredicted_preprocessed_data.columns) == list( @@ -390,8 +402,6 @@ def check_classification_task(string): # first 5 columns: Last Name,First Name,Company / Account,Phone,Email, raw_data = enriched_data.iloc[:, :5] - print(f"raw_data = {raw_data.shape}") - print(f"remapped_predictions = {len(remapped_predictions)}") raw_data["PredictedMerchantSize"] = remapped_predictions if S3_bool: From 98dc73f2960703784b878b7905f21286e0fe6cb9 Mon Sep 17 00:00:00 2001 From: Ahmed Sheta Date: Sun, 4 Feb 2024 18:10:37 +0100 Subject: [PATCH 4/8] modified such hat models can be loaded from local path and applied in Merchant Size Prediction Signed-off-by: Ahmed Sheta --- src/demo/demos.py | 48 +++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/src/demo/demos.py b/src/demo/demos.py index 7437aa1..f5818e5 100644 --- a/src/demo/demos.py +++ b/src/demo/demos.py @@ -254,7 +254,6 @@ def predict_MerchantSize_on_lead_data_demo(): ) ######################### preprocessing the leads ################################## - log.info(f"Preprocessing the leads!") if get_yes_no_input("Run on S3? (y/n)\n'n' means it will run locally!\n"): S3_bool = True else: @@ -264,6 +263,7 @@ def predict_MerchantSize_on_lead_data_demo(): sys.path.append(parent_dir) from preprocessing import Preprocessing + log.info(f"Preprocessing the leads...") preprocessor = Preprocessing( filter_null_data=False, historical_bool=False, S3_bool=S3_bool ) @@ -272,7 +272,7 @@ def predict_MerchantSize_on_lead_data_demo(): preprocessor.save_preprocessed_data() ############################## adapting the preprocessing files ########################### - log.info(f"Adapting the leads' preprocessed data for the ML model!") + log.info(f"Adapting the leads' preprocessed data for the ML model...") # load the data from the CSV files historical_preprocessed_data = pd.read_csv( "s3://amos--data--features/preprocessed_data_files/preprocessed_data.csv" @@ -309,7 +309,6 @@ def predict_MerchantSize_on_lead_data_demo(): historical_columns_order ] if S3_bool: - log.info(f"Adapting the leads' preprocessed data for the ML model!") toBePredicted_output_path_s3 = ( "s3://amos--data--events/leads/toBePredicted_preprocessed_data_updated.csv" ) @@ -343,9 +342,14 @@ def predict_MerchantSize_on_lead_data_demo(): bucket_name = "amos--models" - model_name = get_string_input( - "Provide model file name in amos--models/models S3 Bucket\nInput example: lightgbm_epochs(1)_f1(0.6375)_numclasses(5)_model.pkl\n" - ) + if S3_bool: + model_name = get_string_input( + "Provide model file name in amos--models/models S3 Bucket\nInput example: lightgbm_epochs(1)_f1(0.6375)_numclasses(5)_model.pkl\n" + ) + else: + model_name = get_string_input( + "Provide model file name in data/models local directory\nInput example: lightgbm_epochs(1)_f1(0.6375)_numclasses(5)_model.pkl\n" + ) # file_key = "models/lightgbm_epochs(1)_f1(0.6375)_numclasses(5)_model_updated.pkl" # adjust according to the desired model model_name = model_name.replace(" ", "") xgb_bool = False @@ -364,17 +368,29 @@ def check_classification_task(string): False classification_task_3 = check_classification_task(file_key) - # create an S3 client - s3 = boto3.client("s3") - - # download the file from S3 - response = s3.get_object(Bucket=bucket_name, Key=file_key) - model_content = response["Body"].read() - # load model - with BytesIO(model_content) as model_file: - model = joblib.load(model_file) - log.info(f"Loaded the model sucessfully!") + try: + if S3_bool: + # create an S3 client + s3 = boto3.client("s3") + # download the file from S3 + response = s3.get_object(Bucket=bucket_name, Key=file_key) + model_content = response["Body"].read() + # load model + with BytesIO(model_content) as model_file: + model = joblib.load(model_file) + log.info(f"Loaded the model from S3 bucket sucessfully!") + else: + path_components = preprocessor.data_path.split( + "\\" if "\\" in preprocessor.data_path else "/" + ) + path_components.pop() + path_components.append(file_key) + model_local_path = "/".join(path_components) + model = joblib.load(model_local_path) + log.info(f"Loaded the model from the local path sucessfully!") + except: + log.error("No model found with the given name!") if S3_bool: data_path = ( From af8a7e2f72ee3fb66513841da600f063f0b25e74 Mon Sep 17 00:00:00 2001 From: Ahmed Sheta Date: Sun, 4 Feb 2024 19:38:48 +0100 Subject: [PATCH 5/8] modifications after review Signed-off-by: Ahmed Sheta --- src/demo/demos.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/demo/demos.py b/src/demo/demos.py index f5818e5..b7a1481 100644 --- a/src/demo/demos.py +++ b/src/demo/demos.py @@ -250,7 +250,7 @@ def predict_MerchantSize_on_lead_data_demo(): import pandas as pd log.info( - "Note: In case of running locally, enriched data must be located at src/data/leads_enriched.csv locally\nIn case of running on S3, enriched data must be located at s3://amos--data--events/leads/enriched.csv or" + "Note: In case of running locally, enriched data must be located at src/data/leads_enriched.csv\nIn case of running on S3, enriched data must be located at s3://amos--data--events/leads/enriched.csv" ) ######################### preprocessing the leads ################################## From b27c0b5567e5e38c699ead45c482d668f7820e08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucca=20Baumg=C3=A4rtner?= Date: Mon, 5 Feb 2024 11:03:15 +0100 Subject: [PATCH 6/8] quick fix: make sure db type is respected in preprocessing step MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Lucca Baumgärtner --- src/data/preprocessed_data_files/.gitkeep | 0 src/demo/demos.py | 8 ++------ 2 files changed, 2 insertions(+), 6 deletions(-) create mode 100644 src/data/preprocessed_data_files/.gitkeep diff --git a/src/data/preprocessed_data_files/.gitkeep b/src/data/preprocessed_data_files/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/demo/demos.py b/src/demo/demos.py index b7a1481..3b0bdc7 100644 --- a/src/demo/demos.py +++ b/src/demo/demos.py @@ -8,13 +8,13 @@ import re -import subprocess import pandas as pd import xgboost as xgb from sklearn.metrics import classification_report from bdc.pipeline import Pipeline +from config import DATABASE_TYPE from database import get_database from demo.console_utils import ( get_int_input, @@ -241,7 +241,6 @@ def preprocessing_demo(): def predict_MerchantSize_on_lead_data_demo(): import os - import pickle import sys from io import BytesIO @@ -254,10 +253,7 @@ def predict_MerchantSize_on_lead_data_demo(): ) ######################### preprocessing the leads ################################## - if get_yes_no_input("Run on S3? (y/n)\n'n' means it will run locally!\n"): - S3_bool = True - else: - S3_bool = False + S3_bool = DATABASE_TYPE == "S3" current_dir = os.path.dirname(__file__) if "__file__" in locals() else os.getcwd() parent_dir = os.path.join(current_dir, "..") sys.path.append(parent_dir) From 5dad860f5224a0c6084acaae09945106a12e43ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucca=20Baumg=C3=A4rtner?= Date: Mon, 5 Feb 2024 11:42:08 +0100 Subject: [PATCH 7/8] add folder for models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Lucca Baumgärtner --- .gitignore | 3 ++- src/data/models/.gitkeep | 0 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 src/data/models/.gitkeep diff --git a/.gitignore b/.gitignore index c346f48..e746e53 100644 --- a/.gitignore +++ b/.gitignore @@ -53,7 +53,8 @@ bin/ !**/data/merged_geo.geojson **/data/reviews/*.json **/data/gpt-results/*.json -**/data/models/* +**/data/models/*.pkl +**/data/models/*.joblib **/data/classification_reports/* **/docs/* diff --git a/src/data/models/.gitkeep b/src/data/models/.gitkeep new file mode 100644 index 0000000..e69de29 From c718b1bdd043f0537c89629f0729797f04d5ea47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucca=20Baumg=C3=A4rtner?= Date: Mon, 5 Feb 2024 11:53:50 +0100 Subject: [PATCH 8/8] return to menu if invalid model name is given MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Lucca Baumgärtner --- src/demo/demos.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/demo/demos.py b/src/demo/demos.py index 3b0bdc7..2371f1d 100644 --- a/src/demo/demos.py +++ b/src/demo/demos.py @@ -387,6 +387,7 @@ def check_classification_task(string): log.info(f"Loaded the model from the local path sucessfully!") except: log.error("No model found with the given name!") + return if S3_bool: data_path = (