diff --git a/models/phishing-models/phishing-bert-20221115.onnx b/models/phishing-models/phishing-bert-20221115.onnx
deleted file mode 100644
index 7c1e0ec716..0000000000
--- a/models/phishing-models/phishing-bert-20221115.onnx
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:80cc263dd7f9087dd19decfa687614635381540b29ba5aabc4ae1ffa7009e757
-size 438007870
diff --git a/models/phishing-models/phishing-bert-20221115.pt b/models/phishing-models/phishing-bert-20221115.pt
deleted file mode 100644
index b28ef683ba..0000000000
--- a/models/phishing-models/phishing-bert-20221115.pt
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0bca567a4bd840fb4b20b6b4a1fb98d9a5a79b2b2dc959b7b539ee35e10357b6
-size 438040521
diff --git a/models/phishing-models/phishing-bert-20230421.onnx b/models/phishing-models/phishing-bert-20230421.onnx
new file mode 100644
index 0000000000..6365d2bc31
--- /dev/null
+++ b/models/phishing-models/phishing-bert-20230421.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d6e89b2890ed8f7d89577f3b204a117f115c90116025280a5b164c55400e8d4
+size 438207850
diff --git a/models/phishing-models/phishing-bert-20230421.pt b/models/phishing-models/phishing-bert-20230421.pt
new file mode 100644
index 0000000000..b27de845fb
--- /dev/null
+++ b/models/phishing-models/phishing-bert-20230421.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e0a4b6454dbf20625161e36337bf75e1175cc7fb9a70af22671379489818482
+size 438049721
diff --git a/models/sid-models/sid-minibert-20211021.onnx b/models/sid-models/sid-minibert-20211021.onnx
deleted file mode 100644
index 0140aa2fa4..0000000000
--- a/models/sid-models/sid-minibert-20211021.onnx
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9a8729582a6784121e78ba4305513c3099314a75ecf51501470d1244555588cc
-size 44720790
diff --git a/models/sid-models/sid-minibert-20211021.pth b/models/sid-models/sid-minibert-20211021.pth
deleted file mode 100644
index 3ce5fdd30a..0000000000
--- a/models/sid-models/sid-minibert-20211021.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d552a23031146cc76952989f0299b6b79090bf0e8998c0c40bb635966b5801e7
-size 44737609
diff --git a/models/sid-models/sid-minibert-20230424.onnx b/models/sid-models/sid-minibert-20230424.onnx
new file mode 100644
index 0000000000..6e28a69796
--- /dev/null
+++ b/models/sid-models/sid-minibert-20230424.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d12f31d6dcebd03dab1824c850ebe928b5f5b76d3e9029e9b160f593c12bea9e
+size 44793456
diff --git a/models/sid-models/sid-minibert-20230424.pt b/models/sid-models/sid-minibert-20230424.pt
new file mode 100644
index 0000000000..c63fdc8124
--- /dev/null
+++ b/models/sid-models/sid-minibert-20230424.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4216cca489d02cfdf7d03f2c8c5f19e52d94de8661957f9449ba289329b6cb0
+size 44743365
diff --git a/models/training-tuning-scripts/log-parsing-models/log-parsing-training.ipynb b/models/training-tuning-scripts/log-parsing-models/log-parsing-training.ipynb
index 74b5f16d2a..6185f737ff 100644
--- a/models/training-tuning-scripts/log-parsing-models/log-parsing-training.ipynb
+++ b/models/training-tuning-scripts/log-parsing-models/log-parsing-training.ipynb
@@ -24,7 +24,9 @@
{
"cell_type": "code",
"execution_count": 1,
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"from seqeval.metrics import classification_report,accuracy_score,f1_score\n",
@@ -66,7 +68,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
@@ -111,56 +113,59 @@
" \n",
"
\n",
" \n",
- " 257 | \n",
+ " 4655 | \n",
" <NA> | \n",
" <NA> | \n",
- " 158.69.5.181 - - [04/Apr/2018:23:06:49 +0200] ... | \n",
- " 158.69.5.181 | \n",
- " - | \n",
+ " 193.106.31.130 - - [11/Aug/2019:19:54:28 +0200... | \n",
+ " 193.106.31.130 | \n",
" - | \n",
" - | \n",
" - | \n",
- " Other | \n",
- " Other | \n",
- " <NA> | \n",
- " 1.1 | \n",
+ " Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ... | \n",
+ " IE | \n",
+ " Windows | \n",
+ " Vista | \n",
+ " 1.0 | \n",
" POST | \n",
" /administrator/index.php | \n",
- " 4498 | \n",
+ " 4481 | \n",
" 200 | \n",
- " [04/Apr/2018:23:06:49 +0200] | \n",
+ " [11/Aug/2019:19:54:28 +0200] | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " error_level error_message \\\n",
- "257 \n",
+ " error_level error_message \\\n",
+ "4655 \n",
+ "\n",
+ " raw remote_host \\\n",
+ "4655 193.106.31.130 - - [11/Aug/2019:19:54:28 +0200... 193.106.31.130 \n",
"\n",
- " raw remote_host \\\n",
- "257 158.69.5.181 - - [04/Apr/2018:23:06:49 +0200] ... 158.69.5.181 \n",
+ " remote_logname remote_user request_header_referer \\\n",
+ "4655 - - - \n",
"\n",
- " remote_logname remote_user request_header_referer \\\n",
- "257 - - - \n",
+ " request_header_user_agent \\\n",
+ "4655 Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ... \n",
"\n",
- " request_header_user_agent request_header_user_agent__browser__family \\\n",
- "257 - Other \n",
+ " request_header_user_agent__browser__family \\\n",
+ "4655 IE \n",
"\n",
- " request_header_user_agent__os__family \\\n",
- "257 Other \n",
+ " request_header_user_agent__os__family \\\n",
+ "4655 Windows \n",
"\n",
- " request_header_user_agent__os__version_string request_http_ver \\\n",
- "257 1.1 \n",
+ " request_header_user_agent__os__version_string request_http_ver \\\n",
+ "4655 Vista 1.0 \n",
"\n",
- " request_method request_url response_bytes_clf status \\\n",
- "257 POST /administrator/index.php 4498 200 \n",
+ " request_method request_url response_bytes_clf status \\\n",
+ "4655 POST /administrator/index.php 4481 200 \n",
"\n",
- " time_received \n",
- "257 [04/Apr/2018:23:06:49 +0200] "
+ " time_received \n",
+ "4655 [11/Aug/2019:19:54:28 +0200] "
]
},
- "execution_count": 4,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -172,7 +177,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -191,7 +196,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@@ -203,7 +208,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
@@ -215,7 +220,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -247,7 +252,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
@@ -274,7 +279,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
@@ -292,7 +297,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
@@ -323,15 +328,15 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "/opt/conda/envs/rapids/lib/python3.8/site-packages/cudf/core/subword_tokenizer.py:187: UserWarning: When truncation is not True, the behaviour currently differs from HuggingFace as cudf always returns overflowing tokens\n",
- " warn(warning_msg)\n"
+ "/opt/conda/envs/morpheus/lib/python3.8/site-packages/cudf/core/subword_tokenizer.py:189: UserWarning: When truncation is not True, the behavior currently differs from HuggingFace as cudf always returns overflowing tokens\n",
+ " warnings.warn(warning_msg)\n"
]
}
],
@@ -349,7 +354,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
@@ -365,7 +370,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
@@ -376,7 +381,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
@@ -395,7 +400,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
@@ -414,7 +419,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
@@ -424,7 +429,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
@@ -437,7 +442,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
@@ -456,17 +461,17 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias']\n",
+ "Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']\n",
"- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
- "Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
+ "Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
}
@@ -491,7 +496,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
@@ -515,37 +520,37 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "Epoch: 50%|█████ | 1/2 [00:35<00:35, 35.41s/it]"
+ "Epoch: 50%|█████ | 1/2 [00:38<00:38, 38.73s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Train loss: 0.18636336472931586\n"
+ "Train loss: 0.2076284834630277\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
- "Epoch: 100%|██████████| 2/2 [01:10<00:00, 35.27s/it]"
+ "Epoch: 100%|██████████| 2/2 [01:17<00:00, 38.85s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Train loss: 0.0059268270875965185\n",
- "CPU times: user 44.8 s, sys: 25.7 s, total: 1min 10s\n",
- "Wall time: 1min 10s\n"
+ "Train loss: 0.008250679652531925\n",
+ "CPU times: user 1min 16s, sys: 896 ms, total: 1min 17s\n",
+ "Wall time: 1min 17s\n"
]
},
{
@@ -596,7 +601,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
@@ -606,33 +611,58 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 23,
"metadata": {},
"outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/conda/envs/morpheus/lib/python3.8/site-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: [PAD] seems not to be NE tag.\n",
+ " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "f1 score: 0.997863\n",
+ "Accuracy score: 0.999263\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/conda/envs/morpheus/lib/python3.8/site-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+ " _warn_prf(average, modifier, msg_start, len(result))\n",
+ "/opt/conda/envs/morpheus/lib/python3.8/site-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+ " _warn_prf(average, modifier, msg_start, len(result))\n"
+ ]
+ },
{
"name": "stdout",
"output_type": "stream",
"text": [
- "f1 score: 0.998655\n",
- "Accuracy score: 0.999771\n",
" precision recall f1-score support\n",
"\n",
- " error_level 1.000 1.000 1.000 100\n",
- " error_message 1.000 1.000 1.000 100\n",
- " remote_host 1.000 1.000 1.000 913\n",
- " request_header_referer 1.000 1.000 1.000 508\n",
- " request_header_user_agent 1.000 1.000 1.000 1002\n",
- "request_header_user_agent__os__version_string 0.875 1.000 0.933 14\n",
- " request_http_ver 1.000 1.000 1.000 913\n",
- " request_method 1.000 1.000 1.000 913\n",
- " request_url 0.997 0.981 0.989 913\n",
- " response_bytes_clf 1.000 1.000 1.000 911\n",
- " status 1.000 1.000 1.000 912\n",
- " time_received 1.000 1.000 1.000 985\n",
+ " PAD] 0.000 0.000 0.000 0\n",
+ " error_level 1.000 1.000 1.000 90\n",
+ " error_message 1.000 1.000 1.000 90\n",
+ " remote_host 1.000 1.000 1.000 890\n",
+ " request_header_referer 1.000 0.996 0.998 476\n",
+ " request_header_user_agent 1.000 1.000 1.000 1005\n",
+ "request_header_user_agent__os__version_string 0.000 0.000 0.000 19\n",
+ " request_http_ver 1.000 1.000 1.000 890\n",
+ " request_method 1.000 1.000 1.000 890\n",
+ " request_url 1.000 0.990 0.995 890\n",
+ " response_bytes_clf 1.000 1.000 1.000 888\n",
+ " status 1.000 1.000 1.000 888\n",
+ " time_received 0.998 1.000 0.999 952\n",
"\n",
- " micro avg 0.999 0.998 0.999 8184\n",
- " macro avg 0.989 0.998 0.994 8184\n",
- " weighted avg 0.999 0.998 0.999 8184\n",
+ " micro avg 0.999 0.996 0.998 7968\n",
+ " macro avg 0.846 0.845 0.846 7968\n",
+ " weighted avg 0.997 0.996 0.997 7968\n",
"\n"
]
}
@@ -700,7 +730,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
@@ -714,6 +744,65 @@
" model.save_pretrained('log_parsing_apache_morpheus')"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Export model to ONNX"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "tokenizer_output = tokenizer(logs_df[\"raw_preprocess\"][0:3],\n",
+ " max_length=MAX_SEQ_LEN,\n",
+ " stride = STRIDE,\n",
+ " truncation=False,\n",
+ " max_num_rows = 3,\n",
+ " add_special_tokens=False,\n",
+ " return_tensors='pt'\n",
+ " )\n",
+ "sample_input_ids = tokenizer_output['input_ids'].type(torch.long)\n",
+ "sample_attention_masks = tokenizer_output['attention_mask'].type(torch.long)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "sample_model_input = (sample_input_ids, sample_attention_masks)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "torch.onnx.export(model, \n",
+ " sample_model_input, \n",
+ " \"model.onnx\", # where to save the model\n",
+ " export_params=True, # store the trained parameter weights inside the model file\n",
+ " opset_version=10, # the ONNX version to export the model to\n",
+ " do_constant_folding=True, # whether to execute constant folding for optimization\n",
+ " input_names = ['input_ids','attention_mask'], # the model's input names\n",
+ " output_names = ['output'], # the model's output names\n",
+ " dynamic_axes={'input_ids' : {0 : 'batch_size'}, # variable length axes\n",
+ " 'attention_mask': {0: 'batch_size'}, \n",
+ " 'output' : {0 : 'batch_size'}})"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -733,18 +822,11 @@
"\n",
"https://medium.com/rapids-ai/cybert-28b35a4c81c4"
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -758,7 +840,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.10"
+ "version": "3.8.15"
}
},
"nbformat": 4,
diff --git a/models/training-tuning-scripts/phishing-models/phish-bert-training.ipynb b/models/training-tuning-scripts/phishing-models/phish-bert-training.ipynb
index 743ad313fc..3cd8eed7d7 100644
--- a/models/training-tuning-scripts/phishing-models/phish-bert-training.ipynb
+++ b/models/training-tuning-scripts/phishing-models/phish-bert-training.ipynb
@@ -43,19 +43,13 @@
{
"cell_type": "code",
"execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/opt/conda/envs/morpheus/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
- " from .autonotebook import tqdm as notebook_tqdm\n"
- ]
- }
- ],
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
"source": [
"import cudf\n",
+ "from cudf.core.subword_tokenizer import SubwordTokenizer\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import f1_score\n",
"import binary_sequence_classifier\n",
@@ -91,7 +85,9 @@
{
"cell_type": "code",
"execution_count": 2,
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"if not os.path.isfile(\"smsspamcollection.zip\"): \n",
@@ -103,7 +99,9 @@
{
"cell_type": "code",
"execution_count": 3,
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [
{
"name": "stdout",
@@ -127,7 +125,9 @@
{
"cell_type": "code",
"execution_count": 4,
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"if not os.path.isfile(\"SMSSpamCollection\"):\n",
@@ -202,10 +202,10 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']\n",
+ "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight']\n",
"- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
- "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
+ "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
}
@@ -241,28 +241,28 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "Epoch: 50%|█████ | 1/2 [00:35<00:35, 35.78s/it]"
+ "Epoch: 50%|█████ | 1/2 [00:34<00:34, 34.08s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Train loss: 0.09204745624946165\n"
+ "Train loss: 0.07317519100782062\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
- "Epoch: 100%|██████████| 2/2 [01:11<00:00, 35.92s/it]"
+ "Epoch: 100%|██████████| 2/2 [01:09<00:00, 34.79s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Train loss: 0.01900260798949083\n"
+ "Train loss: 0.015208904994817982\n"
]
},
{
@@ -283,8 +283,8 @@
"metadata": {},
"outputs": [],
"source": [
- "# save model file and configuration file in a directory\n",
- "seq_classifier.save_model(\"./phish-bert-model\")"
+ "# save as pytorch model\n",
+ "torch.save(seq_classifier._model.module, \"phishing-bert.pt\")"
]
},
{
@@ -345,7 +345,7 @@
{
"data": {
"text/plain": [
- "0.9729729729729729"
+ "0.9731543624161074"
]
},
"execution_count": 15,
@@ -359,6 +359,59 @@
"f1_score(true_labels, tests)"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Export Model to ONNX"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tokenizer = SubwordTokenizer(\"./resources/bert-base-uncased-hash.txt\", do_lower_case=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tokenizer_output = tokenizer(df[\"message\"][0:3],\n",
+ " max_length=128,\n",
+ " max_num_rows=3,\n",
+ " truncation=True,\n",
+ " add_special_tokens=True,\n",
+ " return_tensors=\"pt\")\n",
+ "\n",
+ "sample_model_input = (tokenizer_output[\"input_ids\"].type(torch.long), tokenizer_output[\"attention_mask\"].type(torch.long))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "torch.onnx.export(seq_classifier._model.module, \n",
+ " sample_model_input, \n",
+ " \"model.onnx\", # where to save the model\n",
+ " export_params=True, # store the trained parameter weights inside the model file\n",
+ " opset_version=10, # the ONNX version to export the model to\n",
+ " do_constant_folding=True, # whether to execute constant folding for optimization\n",
+ " input_names = ['input_ids','attention_mask'], # the model's input names\n",
+ " output_names = ['output'], # the model's output names\n",
+ " dynamic_axes={'input_ids' : {0 : 'batch_size'}, # variable length axes\n",
+ " 'attention_mask': {0: 'batch_size'}, \n",
+ " 'output' : {0 : 'batch_size'}})"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -418,7 +471,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.9 (default, Jan 26 2021, 15:33:00) \n[GCC 8.4.0]"
+ "version": "3.8.15"
},
"vscode": {
"interpreter": {
diff --git a/models/training-tuning-scripts/root-cause-models/root-cause-bert.ipynb b/models/training-tuning-scripts/root-cause-models/root-cause-bert.ipynb
index 9d729610ac..37f09e6754 100644
--- a/models/training-tuning-scripts/root-cause-models/root-cause-bert.ipynb
+++ b/models/training-tuning-scripts/root-cause-models/root-cause-bert.ipynb
@@ -64,8 +64,9 @@
},
"outputs": [],
"source": [
- "import cudf;\n",
- "from binary_sequence_classifier import BinarySequenceClassifier;\n",
+ "import cudf\n",
+ "from cudf.core.subword_tokenizer import SubwordTokenizer\n",
+ "from binary_sequence_classifier import BinarySequenceClassifier\n",
"from os import path;\n",
"from sklearn.metrics import confusion_matrix\n",
"from sklearn.metrics import f1_score\n",
@@ -88,7 +89,9 @@
{
"cell_type": "code",
"execution_count": 2,
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"random_seed=42"
@@ -100,7 +103,8 @@
"metadata": {
"colab": {},
"colab_type": "code",
- "id": "_UkeC7SG2krJ"
+ "id": "_UkeC7SG2krJ",
+ "tags": []
},
"outputs": [],
"source": [
@@ -187,7 +191,9 @@
{
"cell_type": "code",
"execution_count": 11,
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"#X_train.to_csv(\"Rootcause-training-data.csv\",index=False)"
@@ -217,7 +223,7 @@
"metadata": {},
"outputs": [],
"source": [
- "X_test.to_json(\"Rootcause-validation-data.jsonlines\",orient='records',lines=True)"
+ "X_test.to_json(\"Rootcause-validation-data.jsonlines\", orient='records',lines=True)"
]
},
{
@@ -264,10 +270,10 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']\n",
+ "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']\n",
"- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
- "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
+ "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
}
@@ -307,14 +313,14 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "Epoch: 100%|██████████| 1/1 [00:08<00:00, 8.64s/it]"
+ "Epoch: 100%|██████████| 1/1 [00:09<00:00, 9.12s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Train loss: 0.6523606370795857\n"
+ "Train loss: 0.5870807089588859\n"
]
},
{
@@ -370,7 +376,7 @@
{
"data": {
"text/plain": [
- "0.9601666666666666"
+ "0.95"
]
},
"execution_count": 21,
@@ -423,7 +429,7 @@
{
"data": {
"text/plain": [
- "0.9765765765765766"
+ "0.9591474245115452"
]
},
"execution_count": 24,
@@ -457,8 +463,8 @@
{
"data": {
"text/plain": [
- "array([[189, 13],\n",
- " [ 0, 271]])"
+ "array([[180, 22],\n",
+ " [ 1, 270]])"
]
},
"execution_count": 25,
@@ -487,28 +493,29 @@
{
"data": {
"text/plain": [
- "(array([ 0, 1, 7, 9, 11, 12, 13, 14, 16, 18, 19, 20, 22,\n",
- " 23, 24, 25, 29, 31, 34, 42, 44, 45, 46, 47, 49, 50,\n",
- " 51, 52, 53, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65,\n",
- " 66, 67, 68, 69, 70, 71, 73, 74, 75, 80, 81, 82, 85,\n",
- " 86, 88, 89, 90, 92, 93, 94, 95, 97, 98, 99, 101, 103,\n",
- " 104, 105, 106, 107, 109, 110, 112, 113, 114, 115, 117, 119, 120,\n",
- " 123, 124, 125, 126, 128, 129, 130, 131, 134, 136, 137, 138, 139,\n",
- " 140, 141, 142, 143, 144, 145, 146, 148, 149, 150, 153, 154, 156,\n",
- " 159, 160, 161, 162, 163, 164, 165, 166, 170, 172, 173, 174, 175,\n",
- " 176, 178, 181, 182, 183, 187, 188, 191, 194, 196, 200, 201, 202,\n",
- " 205, 206, 207, 209, 210, 214, 215, 216, 217, 218, 221, 222, 223,\n",
- " 224, 226, 227, 228, 229, 231, 234, 235, 236, 237, 240, 244, 245,\n",
- " 246, 250, 251, 254, 255, 256, 257, 262, 265, 266, 267, 268, 269,\n",
- " 271, 274, 275, 276, 278, 284, 285, 287, 289, 290, 291, 292, 293,\n",
- " 294, 295, 296, 299, 300, 301, 302, 303, 304, 309, 311, 312, 315,\n",
- " 316, 317, 318, 319, 321, 322, 323, 328, 329, 330, 331, 332, 334,\n",
- " 335, 340, 343, 347, 349, 350, 351, 352, 353, 354, 355, 356, 357,\n",
- " 358, 361, 363, 365, 367, 369, 373, 374, 375, 378, 379, 380, 381,\n",
- " 382, 383, 384, 385, 386, 388, 389, 390, 392, 394, 395, 396, 397,\n",
- " 398, 399, 400, 401, 402, 404, 406, 407, 408, 409, 410, 411, 412,\n",
- " 414, 415, 416, 418, 419, 420, 421, 425, 426, 429, 432, 434, 435,\n",
- " 436, 437, 438, 441, 442, 443, 444, 450, 453, 460, 471]),)"
+ "(array([ 0, 1, 2, 7, 9, 11, 12, 13, 14, 16, 18, 19, 20,\n",
+ " 22, 23, 24, 25, 29, 31, 34, 42, 43, 44, 45, 46, 47,\n",
+ " 49, 50, 51, 52, 53, 55, 56, 57, 58, 59, 61, 62, 63,\n",
+ " 64, 65, 66, 67, 68, 69, 70, 71, 73, 74, 80, 81, 82,\n",
+ " 85, 86, 88, 89, 90, 92, 93, 94, 95, 97, 98, 99, 101,\n",
+ " 103, 104, 105, 106, 107, 109, 110, 112, 113, 114, 115, 117, 119,\n",
+ " 120, 123, 124, 125, 126, 127, 128, 129, 130, 131, 134, 136, 137,\n",
+ " 138, 139, 140, 141, 142, 143, 144, 145, 146, 148, 149, 150, 153,\n",
+ " 154, 155, 156, 159, 160, 161, 162, 163, 164, 165, 166, 170, 171,\n",
+ " 172, 173, 174, 175, 176, 178, 181, 182, 183, 187, 188, 191, 194,\n",
+ " 196, 200, 201, 202, 205, 206, 207, 209, 210, 214, 215, 216, 217,\n",
+ " 218, 221, 222, 223, 224, 226, 227, 228, 229, 231, 234, 235, 236,\n",
+ " 237, 238, 240, 244, 245, 246, 250, 251, 254, 255, 256, 257, 262,\n",
+ " 263, 265, 266, 267, 268, 269, 271, 274, 275, 276, 278, 279, 284,\n",
+ " 285, 287, 289, 290, 291, 292, 293, 294, 295, 296, 297, 299, 300,\n",
+ " 301, 302, 303, 304, 309, 311, 312, 315, 316, 317, 318, 319, 321,\n",
+ " 322, 323, 328, 329, 330, 331, 332, 334, 340, 343, 344, 347, 349,\n",
+ " 350, 351, 352, 353, 354, 355, 356, 357, 358, 361, 363, 365, 367,\n",
+ " 369, 372, 373, 374, 375, 378, 379, 380, 381, 382, 383, 384, 385,\n",
+ " 386, 388, 389, 390, 392, 394, 395, 396, 397, 398, 399, 400, 401,\n",
+ " 402, 404, 406, 407, 408, 409, 410, 411, 412, 414, 415, 416, 418,\n",
+ " 419, 420, 421, 425, 426, 429, 432, 434, 435, 436, 437, 438, 441,\n",
+ " 442, 443, 444, 450, 452, 453]),)"
]
},
"execution_count": 27,
@@ -520,6 +527,57 @@
"(np.where(testpredseries == 1))"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Export model to ONNX"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tokenizer = SubwordTokenizer(\"./resources/bert-base-uncased-hash.txt\", do_lower_case=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tokenizer_output = tokenizer(dflogs[\"log\"][0:3],\n",
+ " max_length=128,\n",
+ " max_num_rows=3,\n",
+ " truncation=True,\n",
+ " add_special_tokens=True,\n",
+ " return_tensors=\"pt\")\n",
+ "\n",
+ "sample_model_input = (tokenizer_output[\"input_ids\"], tokenizer_output[\"attention_mask\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "torch.onnx.export(seq_classifier._model.module, \n",
+ " sample_model_input, \n",
+ " \"model.onnx\", # where to save the model\n",
+ " export_params=True, # store the trained parameter weights inside the model file\n",
+ " opset_version=10, # the ONNX version to export the model to\n",
+ " do_constant_folding=True, # whether to execute constant folding for optimization\n",
+ " input_names = ['input_ids','attention_mask'], # the model's input names\n",
+ " output_names = ['output'], # the model's output names\n",
+ " dynamic_axes={'input_ids' : {0 : 'batch_size'}, # variable length axes\n",
+ " 'attention_mask': {0: 'batch_size'}, \n",
+ " 'output' : {0 : 'batch_size'}})"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -567,7 +625,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.13"
+ "version": "3.8.15"
}
},
"nbformat": 4,
diff --git a/models/training-tuning-scripts/sid-models/sid-minibert-20211021-script.py b/models/training-tuning-scripts/sid-models/sid-minibert-20230424-script.py
similarity index 96%
rename from models/training-tuning-scripts/sid-models/sid-minibert-20211021-script.py
rename to models/training-tuning-scripts/sid-models/sid-minibert-20230424-script.py
index 7ebf5a9c93..e283bd40f0 100644
--- a/models/training-tuning-scripts/sid-models/sid-minibert-20211021-script.py
+++ b/models/training-tuning-scripts/sid-models/sid-minibert-20230424-script.py
@@ -13,19 +13,19 @@
# limitations under the License.
"""
Example Usage:
-python sid-minibert-20210614-script.py \
- --training-data /datasets/training-data/sid-sample-training-data.csv \
+python sid-minibert-20230424-script.py \
+ --training-data ../../datasets/training-data/sid-sample-training-data.csv \
--model-dir google/bert_uncased_L-4_H-256_A-4 \
- --tokenizer-hash-filepath /resources/bert-base-uncased-hash.txt
- --output-file /trained_models/model.pth
+ --tokenizer-hash-filepath /resources/bert-base-uncased-hash.txt \
+ --output-file sid-minibert-model.pt
"""
import argparse
-
-import torch
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import multilabel_confusion_matrix
+
+import torch
from torch.nn import BCEWithLogitsLoss
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
@@ -62,9 +62,9 @@ def data_preprocessing(training_data):
cased_tokenizer = SubwordTokenizer("resources/bert-base-uncased-hash.txt", do_lower_case=True)
- tokenizer_output = cased_tokenizer(df.text,
+ tokenizer_output = cased_tokenizer(df.data,
max_length=256,
- max_num_rows=len(df.text),
+ max_num_rows=len(df.data),
padding='max_length',
return_tensors='pt',
truncation=True,
diff --git a/models/training-tuning-scripts/sid-models/sid-minibert-20211021.ipynb b/models/training-tuning-scripts/sid-models/sid-minibert-20230424.ipynb
similarity index 82%
rename from models/training-tuning-scripts/sid-models/sid-minibert-20211021.ipynb
rename to models/training-tuning-scripts/sid-models/sid-minibert-20230424.ipynb
index 5bf632bdf0..a5564de42b 100644
--- a/models/training-tuning-scripts/sid-models/sid-minibert-20211021.ipynb
+++ b/models/training-tuning-scripts/sid-models/sid-minibert-20230424.ipynb
@@ -63,7 +63,7 @@
"metadata": {},
"outputs": [],
"source": [
- "df = cudf.read_csv(\"../datasets/training-data/sid-sample-training-data.csv\")"
+ "df = cudf.read_csv(\"../../datasets/training-data/sid-sample-training-data.csv\")"
]
},
{
@@ -224,7 +224,7 @@
"# load the following model for mini-bert from huggingface\n",
"# model = AutoModelForSequenceClassification.from_pretrained(\"google/bert_uncased_L-4_H-256_A-4\", num_labels=num_labels)\n",
"\n",
- "model = torch.load('repo_model/sid-minibert-20211021.pth')"
+ "model = torch.load('../../sid-models/sid-minibert-20230424.pt')"
]
},
{
@@ -266,7 +266,16 @@
"execution_count": 13,
"id": "educational-channel",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/conda/envs/morpheus/lib/python3.8/site-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+ " warnings.warn(\n"
+ ]
+ }
+ ],
"source": [
"# using hyperparameters recommended in orginial BERT paper\n",
"# the optimizer allows us to apply different hyperpameters for specific parameter groups\n",
@@ -293,14 +302,14 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "Epoch: 100%|██████████| 1/1 [00:24<00:00, 24.37s/it]"
+ "Epoch: 100%|██████████| 1/1 [00:02<00:00, 2.61s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Train loss: 0.0006268636239110492\n"
+ "Train loss: 0.000367460027046036\n"
]
},
{
@@ -373,8 +382,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "F1 Macro Validation Accuracy: 99.87012987012986\n",
- "Flat Validation Accuracy: 99.75\n"
+ "F1 Macro Validation Accuracy: 100.0\n",
+ "Flat Validation Accuracy: 100.0\n"
]
}
],
@@ -427,32 +436,32 @@
"[[370 0]\n",
" [ 0 30]]\n",
"si_bank_acct\n",
- "[[354 0]\n",
- " [ 0 46]]\n",
+ "[[358 0]\n",
+ " [ 0 42]]\n",
"si_credit_card\n",
- "[[357 0]\n",
- " [ 0 43]]\n",
+ "[[363 0]\n",
+ " [ 0 37]]\n",
"si_email\n",
- "[[362 0]\n",
- " [ 0 38]]\n",
+ "[[369 0]\n",
+ " [ 0 31]]\n",
"si_govt_id\n",
- "[[361 0]\n",
- " [ 0 39]]\n",
- "si_name\n",
- "[[361 1]\n",
- " [ 0 38]]\n",
- "si_password\n",
"[[357 0]\n",
" [ 0 43]]\n",
+ "si_name\n",
+ "[[369 0]\n",
+ " [ 0 31]]\n",
+ "si_password\n",
+ "[[358 0]\n",
+ " [ 0 42]]\n",
"si_phone_num\n",
- "[[355 0]\n",
- " [ 0 45]]\n",
+ "[[368 0]\n",
+ " [ 0 32]]\n",
"si_secret_keys\n",
- "[[365 0]\n",
- " [ 0 35]]\n",
+ "[[361 0]\n",
+ " [ 0 39]]\n",
"si_user\n",
- "[[365 0]\n",
- " [ 0 35]]\n"
+ "[[361 0]\n",
+ " [ 0 39]]\n"
]
}
],
@@ -485,6 +494,55 @@
"# torch.save(model, output_file) "
]
},
+ {
+ "cell_type": "markdown",
+ "id": "96d4dc3b-54a4-4ca8-8a40-bb7dca765180",
+ "metadata": {},
+ "source": [
+ "## Export Model to ONNX"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "96f92112-cf40-4b1c-b796-b196ab9f3928",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "tokenizer_output = bert_uncased_tokenizer(df[\"data\"][0:3],\n",
+ " max_length=256,\n",
+ " max_num_rows=3,\n",
+ " truncation=True,\n",
+ " add_special_tokens=True,\n",
+ " return_tensors=\"pt\")\n",
+ "\n",
+ "sample_model_input = (tokenizer_output[\"input_ids\"], tokenizer_output[\"attention_mask\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "beef0eab-e852-4fb1-8020-7fc02d475c1e",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "torch.onnx.export(model, \n",
+ " sample_model_input, \n",
+ " \"model.onnx\", # where to save the model\n",
+ " export_params=True, # store the trained parameter weights inside the model file\n",
+ " opset_version=10, # the ONNX version to export the model to\n",
+ " do_constant_folding=True, # whether to execute constant folding for optimization\n",
+ " input_names = ['input_ids','attention_mask'], # the model's input names\n",
+ " output_names = ['output'], # the model's output names\n",
+ " dynamic_axes={'input_ids' : {0 : 'batch_size'}, # variable length axes\n",
+ " 'attention_mask': {0: 'batch_size'}, \n",
+ " 'output' : {0 : 'batch_size'}})"
+ ]
+ },
{
"cell_type": "markdown",
"id": "ideal-community",
@@ -498,7 +556,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -512,7 +570,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.10"
+ "version": "3.8.15"
}
},
"nbformat": 4,
diff --git a/models/triton-model-repo/phishing-bert-onnx/1/model.onnx b/models/triton-model-repo/phishing-bert-onnx/1/model.onnx
index a3aacf0265..164a725392 120000
--- a/models/triton-model-repo/phishing-bert-onnx/1/model.onnx
+++ b/models/triton-model-repo/phishing-bert-onnx/1/model.onnx
@@ -1 +1 @@
-../../../phishing-models/phishing-bert-20221115.onnx
\ No newline at end of file
+../../../phishing-models/phishing-bert-20230421.onnx
\ No newline at end of file
diff --git a/models/triton-model-repo/sid-minibert-onnx/1/model.onnx b/models/triton-model-repo/sid-minibert-onnx/1/model.onnx
index 7340667742..b46b5afbc1 120000
--- a/models/triton-model-repo/sid-minibert-onnx/1/model.onnx
+++ b/models/triton-model-repo/sid-minibert-onnx/1/model.onnx
@@ -1 +1 @@
-../../../sid-models/sid-minibert-20211021.onnx
\ No newline at end of file
+../../../sid-models/sid-minibert-20230424.onnx
\ No newline at end of file