Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Creational #157

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions covid_nlp/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,23 @@
from sklearn.metrics import roc_auc_score, f1_score
from farm.utils import MLFlowLogger

class SingletonEval(type):

#create a metaclass
#overall idea retrieved from StackOverflow https://stackoverflow.com/questions/6760685/creating-a-singleton-in-python
_instances = {}

def getInstance (*args,**kwargs, theClass):

if theClass not in theClass.instances:

instance = super().getInstance(*args,**kwargs)
theClass._instances[theClass] = instance

return theClass._instances[theClass]


class Singleton(metaclass = SingletonEval):

def eval_question_similarity(y_true, y_pred, lang, model_name, params, user=None, log_to_mlflow=True, run_name="default"):
# basic metrics
Expand Down Expand Up @@ -44,7 +61,7 @@ def eval_question_similarity(y_true, y_pred, lang, model_name, params, user=None
y_pred = [0.5] * len(y_true)

# eval & track results
eval_question_similarity(y_true=y_true, y_pred=y_pred, lang=lang, model_name=model_name,
params=params, user="malte", log_to_mlflow=log_to_mlflow, run_name=experiment_name)
ourSingleton = SingletonEval()
ourSingleton.eval_question_similarity(y_true=y_true, y_pred=y_pred, lang=lang, model_name=model_name, params=params, user="malte", log_to_mlflow=log_to_mlflow, run_name=experiment_name)


36 changes: 31 additions & 5 deletions covid_nlp/language/detect_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,34 @@
import pycld2 as cld2
import hmac
from hashlib import sha1
from datetime import datetime
from time import time
import requests

class Algo():
def setAlgo(self):
algorithm = 'HMAC+SHA1'
return algorithm

def setTime(self):
time = datetime.now()
return time

def setConcat(curr_time):
concat = curr_time+os.environ.get('SIL_API_KEY')
return concat

def setConcatB(concat):
concatB = (concat).encode('utf-8')
return concatB

def setSecretB(self):
secretB = os.environ.get('SIL_API_SECRET').encode('utf-8')
return secretB


class LanguageDetector():

def __init__(self, model = 'sil'):
self.model = model

Expand All @@ -20,11 +44,13 @@ def detect_lang_cld3(self, text):
return pred.language, 100*pred.probability

def detect_lang_sil(self, text):
algorithm = 'HMAC+SHA1'
curr_time = str(int(time()))
concat = curr_time+os.environ.get('SIL_API_KEY')
concatB = (concat).encode('utf-8')
secretB = os.environ.get('SIL_API_SECRET').encode('utf-8')
yes = Algo()
#setting these to an algorithm class
algorithm = yes.setAlgo()
curr_time = yes.setTime()
concat = yes.setConcat(curr_time)
concatB = yes.setConcatB()
secretB = yes.setSecretB()
h1 = hmac.new(secretB, concatB, sha1)
api_sig = h1.hexdigest()
params = {'api_key': os.environ.get('SIL_API_KEY'), 'api_sig': api_sig}
Expand Down
5 changes: 3 additions & 2 deletions covid_nlp/modeling/tfidf/tfidf_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re
import pickle
import os
import eval
import json

from sklearn.feature_extraction.text import TfidfVectorizer
Expand All @@ -13,7 +14,7 @@
from tfidf_train import TfidfTrainer

sys.path.insert(0, "./../../")
from eval import eval_question_similarity
from eval import ourSingleton.eval_question_similarity


class TfidfEvaluator():
Expand Down Expand Up @@ -51,7 +52,7 @@ def main():
exp_name = "tfidf_cos_sim_2"
params = {"sp_voc": 16000, "max_ngram": 2, "remove_stopwords": 1,
"data_train": "eval, scraped", "data_sp": "eval, scraped, CORD-19.200k"}
eval_question_similarity(y_true=y_true, y_pred=y_pred, lang="en", model_name=model_name,
ourSingleton.eval_question_similarity(y_true=y_true, y_pred=y_pred, lang="en", model_name=model_name,
params=params, user="carmen", log_to_mlflow=True, run_name=exp_name)


Expand Down
3 changes: 2 additions & 1 deletion covid_nlp/modeling/transformer/eval_pretrained_haystack.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas as pd
import numpy as np
import eval

from sklearn.metrics import roc_auc_score
from farm.utils import MLFlowLogger
Expand Down Expand Up @@ -38,7 +39,7 @@ def eval_pretrained_transformers(eval_file, lang, models, pooling_methods, extra
df["pred"] = np.diag(cosine_similarity(res1, res2))

# eval & track results
eval_question_similarity(y_true=y_true, y_pred=df["pred"].values, lang=lang, model_name=model_name,
ourSingleton.eval_question_similarity(y_true=y_true, y_pred=df["pred"].values, lang=lang, model_name=model_name,
params=params, user="malte", log_to_mlflow=log_to_mlflow, run_name=experiment_name)

if __name__ == "__main__":
Expand Down