From b7d76ce905f48f7672522da64f6918cd27c85f3e Mon Sep 17 00:00:00 2001 From: ShahramNasir Date: Sat, 20 Mar 2021 20:54:27 -0400 Subject: [PATCH 1/4] New Aggregate --- covid_nlp/language/detect_language.py | 35 +++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/covid_nlp/language/detect_language.py b/covid_nlp/language/detect_language.py index 746a4dd..d3c10e6 100644 --- a/covid_nlp/language/detect_language.py +++ b/covid_nlp/language/detect_language.py @@ -6,7 +6,31 @@ from time import time import requests +class Algo(): + def setAlgo(): + algorithm = 'HMAC+SHA1' + return algorithm + + def setTime(): + time = curr_time = str(int(time())) + return time + + def setConcat(): + concat = curr_time+os.environ.get('SIL_API_KEY') + return concat + + def setConcatB(concat): + concatB = (concat).encode('utf-8') + return concatB + + def setSecretB(): + secretB = os.environ.get('SIL_API_SECRET').encode('utf-8') + return secretB + + class LanguageDetector(): + Algo algorithm + def __init__(self, model = 'sil'): self.model = model @@ -20,11 +44,12 @@ def detect_lang_cld3(self, text): return pred.language, 100*pred.probability def detect_lang_sil(self, text): - algorithm = 'HMAC+SHA1' - curr_time = str(int(time())) - concat = curr_time+os.environ.get('SIL_API_KEY') - concatB = (concat).encode('utf-8') - secretB = os.environ.get('SIL_API_SECRET').encode('utf-8') + #setting these to an algorithm class + algorithm = Algo.setAlgo() + curr_time = Algo.setTime() + concat = Algo.setConcat() + concatB = Algo.setConcatB() + secretB = Algo.setSecretB() h1 = hmac.new(secretB, concatB, sha1) api_sig = h1.hexdigest() params = {'api_key': os.environ.get('SIL_API_KEY'), 'api_sig': api_sig} From bbbaf4f1d6382aab207c935c86cc7268c4d06bb2 Mon Sep 17 00:00:00 2001 From: ShahramNasir Date: Sat, 20 Mar 2021 20:57:41 -0400 Subject: [PATCH 2/4] New Aggregate --- covid_nlp/language/detect_language.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/covid_nlp/language/detect_language.py b/covid_nlp/language/detect_language.py index d3c10e6..243089f 100644 --- a/covid_nlp/language/detect_language.py +++ b/covid_nlp/language/detect_language.py @@ -29,7 +29,6 @@ def setSecretB(): class LanguageDetector(): - Algo algorithm def __init__(self, model = 'sil'): self.model = model @@ -44,12 +43,13 @@ def detect_lang_cld3(self, text): return pred.language, 100*pred.probability def detect_lang_sil(self, text): + yes = Algo() #setting these to an algorithm class - algorithm = Algo.setAlgo() - curr_time = Algo.setTime() - concat = Algo.setConcat() - concatB = Algo.setConcatB() - secretB = Algo.setSecretB() + algorithm = yes.setAlgo() + curr_time = yes.setTime() + concat = yes.setConcat() + concatB = yes.setConcatB() + secretB = yes.setSecretB() h1 = hmac.new(secretB, concatB, sha1) api_sig = h1.hexdigest() params = {'api_key': os.environ.get('SIL_API_KEY'), 'api_sig': api_sig} From e3ab823bd308dfb8301fc66c529e0c48c27b9268 Mon Sep 17 00:00:00 2001 From: ShahramNasir Date: Sat, 20 Mar 2021 21:01:13 -0400 Subject: [PATCH 3/4] New Aggregate --- covid_nlp/language/detect_language.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/covid_nlp/language/detect_language.py b/covid_nlp/language/detect_language.py index 243089f..c3096c9 100644 --- a/covid_nlp/language/detect_language.py +++ b/covid_nlp/language/detect_language.py @@ -3,19 +3,20 @@ import pycld2 as cld2 import hmac from hashlib import sha1 +from datetime import datetime from time import time import requests class Algo(): - def setAlgo(): + def setAlgo(self): algorithm = 'HMAC+SHA1' return algorithm - def setTime(): - time = curr_time = str(int(time())) + def setTime(self): + time = datetime.now() return time - def setConcat(): + def setConcat(curr_time): concat = curr_time+os.environ.get('SIL_API_KEY') return concat @@ -23,7 +24,7 @@ def setConcatB(concat): concatB = (concat).encode('utf-8') return concatB - def setSecretB(): + def setSecretB(self): secretB = os.environ.get('SIL_API_SECRET').encode('utf-8') return secretB @@ -43,11 +44,11 @@ def detect_lang_cld3(self, text): return pred.language, 100*pred.probability def detect_lang_sil(self, text): - yes = Algo() + yes = Algo() #setting these to an algorithm class algorithm = yes.setAlgo() curr_time = yes.setTime() - concat = yes.setConcat() + concat = yes.setConcat(curr_time) concatB = yes.setConcatB() secretB = yes.setSecretB() h1 = hmac.new(secretB, concatB, sha1) From 911bf11175d31c5b1a6e10c8e75660233f425f76 Mon Sep 17 00:00:00 2001 From: ShahramNasir Date: Wed, 14 Apr 2021 22:36:42 -0400 Subject: [PATCH 4/4] Singleton applied to eval.py along with updating the references to eval_question_similarity where I could find it --- covid_nlp/eval.py | 21 +++++++++++++++++-- covid_nlp/modeling/tfidf/tfidf_client.py | 5 +++-- .../transformer/eval_pretrained_haystack.py | 3 ++- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/covid_nlp/eval.py b/covid_nlp/eval.py index 31ed2c0..436d70a 100644 --- a/covid_nlp/eval.py +++ b/covid_nlp/eval.py @@ -4,6 +4,23 @@ from sklearn.metrics import roc_auc_score, f1_score from farm.utils import MLFlowLogger +class SingletonEval(type): + + #create a metaclass + #overall idea retrieved from StackOverflow https://stackoverflow.com/questions/6760685/creating-a-singleton-in-python + _instances = {} + + def getInstance (*args,**kwargs, theClass): + + if theClass not in theClass.instances: + + instance = super().getInstance(*args,**kwargs) + theClass._instances[theClass] = instance + + return theClass._instances[theClass] + + +class Singleton(metaclass = SingletonEval): def eval_question_similarity(y_true, y_pred, lang, model_name, params, user=None, log_to_mlflow=True, run_name="default"): # basic metrics @@ -44,7 +61,7 @@ def eval_question_similarity(y_true, y_pred, lang, model_name, params, user=None y_pred = [0.5] * len(y_true) # eval & track results - eval_question_similarity(y_true=y_true, y_pred=y_pred, lang=lang, model_name=model_name, - params=params, user="malte", log_to_mlflow=log_to_mlflow, run_name=experiment_name) + ourSingleton = SingletonEval() + ourSingleton.eval_question_similarity(y_true=y_true, y_pred=y_pred, lang=lang, model_name=model_name, params=params, user="malte", log_to_mlflow=log_to_mlflow, run_name=experiment_name) diff --git a/covid_nlp/modeling/tfidf/tfidf_client.py b/covid_nlp/modeling/tfidf/tfidf_client.py index f93c42b..d8d87b9 100644 --- a/covid_nlp/modeling/tfidf/tfidf_client.py +++ b/covid_nlp/modeling/tfidf/tfidf_client.py @@ -2,6 +2,7 @@ import re import pickle import os +import eval import json from sklearn.feature_extraction.text import TfidfVectorizer @@ -13,7 +14,7 @@ from tfidf_train import TfidfTrainer sys.path.insert(0, "./../../") -from eval import eval_question_similarity +from eval import ourSingleton.eval_question_similarity class TfidfEvaluator(): @@ -51,7 +52,7 @@ def main(): exp_name = "tfidf_cos_sim_2" params = {"sp_voc": 16000, "max_ngram": 2, "remove_stopwords": 1, "data_train": "eval, scraped", "data_sp": "eval, scraped, CORD-19.200k"} - eval_question_similarity(y_true=y_true, y_pred=y_pred, lang="en", model_name=model_name, + ourSingleton.eval_question_similarity(y_true=y_true, y_pred=y_pred, lang="en", model_name=model_name, params=params, user="carmen", log_to_mlflow=True, run_name=exp_name) diff --git a/covid_nlp/modeling/transformer/eval_pretrained_haystack.py b/covid_nlp/modeling/transformer/eval_pretrained_haystack.py index 61af521..4120541 100644 --- a/covid_nlp/modeling/transformer/eval_pretrained_haystack.py +++ b/covid_nlp/modeling/transformer/eval_pretrained_haystack.py @@ -1,5 +1,6 @@ import pandas as pd import numpy as np +import eval from sklearn.metrics import roc_auc_score from farm.utils import MLFlowLogger @@ -38,7 +39,7 @@ def eval_pretrained_transformers(eval_file, lang, models, pooling_methods, extra df["pred"] = np.diag(cosine_similarity(res1, res2)) # eval & track results - eval_question_similarity(y_true=y_true, y_pred=df["pred"].values, lang=lang, model_name=model_name, + ourSingleton.eval_question_similarity(y_true=y_true, y_pred=df["pred"].values, lang=lang, model_name=model_name, params=params, user="malte", log_to_mlflow=log_to_mlflow, run_name=experiment_name) if __name__ == "__main__":