From a4e26f1c730bdad74ae9284faead99adf67a066b Mon Sep 17 00:00:00 2001
From: Jyun-Yu Jiang <jyunyu@amazon.com>
Date: Fri, 31 Jan 2025 03:50:27 +0000
Subject: [PATCH] Remove a redundant XMR test; Add weights_only argument for
 torch.load; Bump CodeQL version to v3

---
 .github/workflows/codeql.yml         |  8 ++--
 .github/workflows/pytest_aarch64.yml |  2 +-
 pecos/core/utils/file_util.hpp       |  1 +
 pecos/xmc/xtransformer/matcher.py    |  6 +--
 pecos/xmc/xtransformer/module.py     |  2 +-
 test/pecos/xmr/test_reranker.py      | 55 ----------------------------
 6 files changed, 10 insertions(+), 64 deletions(-)
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 8769cf1e..7bdf1abc 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -23,18 +23,18 @@ jobs:
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@v2
+      uses: actions/checkout@v3
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@v2
+      uses: github/codeql-action/init@v3
       with:
         languages: ${{ matrix.language }}
 
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@v2
+      uses: github/codeql-action/autobuild@v3
 
     # ℹ️ Command-line programs to run using the OS shell.
     # 📚 https://git.io/JvXDl
@@ -48,4 +48,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v2
+      uses: github/codeql-action/analyze@v3
diff --git a/.github/workflows/pytest_aarch64.yml b/.github/workflows/pytest_aarch64.yml
index 812b4852..58e11b0c 100644
--- a/.github/workflows/pytest_aarch64.yml
+++ b/.github/workflows/pytest_aarch64.yml
@@ -6,7 +6,7 @@ jobs:
   Ubuntu-Python-Unit-Test:
     name: Ubuntu 22.04 Python3.10 Unit Tests
 
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04-arm
 
     steps:
     - uses: actions/checkout@v1
diff --git a/pecos/core/utils/file_util.hpp b/pecos/core/utils/file_util.hpp
index 2f87c0e8..02230a37 100644
--- a/pecos/core/utils/file_util.hpp
+++ b/pecos/core/utils/file_util.hpp
@@ -15,6 +15,7 @@
 #define __FILE_UTIL_H__
 
 #include <algorithm>
+#include <cstdint>
 #include <fstream>
 #include <stdexcept>
 #include <string>
diff --git a/pecos/xmc/xtransformer/matcher.py b/pecos/xmc/xtransformer/matcher.py
index 9ed2a820..23309e9b 100644
--- a/pecos/xmc/xtransformer/matcher.py
+++ b/pecos/xmc/xtransformer/matcher.py
@@ -408,7 +408,7 @@ def load(cls, load_dir):
         # load text_model
         text_model_dir = os.path.join(load_dir, "text_model")
         if os.path.exists(text_model_dir):
-            text_model = torch.load(text_model_dir)
+            text_model = torch.load(text_model_dir, weights_only=False)
         else:
             text_model = None
 
@@ -1330,7 +1330,7 @@ def train(
             saved_trn_pt = kwargs.get("saved_trn_pt", "")
             if not prob.is_tokenized:
                 if saved_trn_pt and os.path.isfile(saved_trn_pt):
-                    trn_tensors = torch.load(saved_trn_pt)
+                    trn_tensors = torch.load(saved_trn_pt, weights_only=False)
                     LOGGER.info("trn tensors loaded_from {}".format(saved_trn_pt))
                 else:
                     trn_tensors = matcher.text_to_tensor(
@@ -1345,7 +1345,7 @@ def train(
             if val_prob is not None and not val_prob.is_tokenized:
                 saved_val_pt = kwargs.get("saved_val_pt", "")
                 if saved_val_pt and os.path.isfile(saved_val_pt):
-                    val_tensors = torch.load(saved_val_pt)
+                    val_tensors = torch.load(saved_val_pt, weights_only=False)
                     LOGGER.info("val tensors loaded from {}".format(saved_val_pt))
                 else:
                     val_tensors = matcher.text_to_tensor(
diff --git a/pecos/xmc/xtransformer/module.py b/pecos/xmc/xtransformer/module.py
index dd396190..3afe3a98 100644
--- a/pecos/xmc/xtransformer/module.py
+++ b/pecos/xmc/xtransformer/module.py
@@ -454,7 +454,7 @@ def load(cls, load_dir, shard=0):
         nr_shards = cls.get_data_stats(load_dir)["num_shards"]
         if shard >= nr_shards:
             raise ValueError(f"Loading shard#{shard} where there are only {nr_shards} available")
-        return torch.load(f"{load_dir}/{shard}")
+        return torch.load(f"{load_dir}/{shard}", weights_only=False)
 
     @property
     def has_ns(self):
diff --git a/test/pecos/xmr/test_reranker.py b/test/pecos/xmr/test_reranker.py
index dfb226c6..efeb39d1 100644
--- a/test/pecos/xmr/test_reranker.py
+++ b/test/pecos/xmr/test_reranker.py
@@ -50,58 +50,3 @@ def test_numr_encoder():
         out_feat.numpy(),
         abs=0.0,
     ), f"Enc(inp_feat) != inp_feat, given Enc is identity"
-
-
-def test_textnumr_encoder():
-    import torch
-    from transformers import set_seed
-    from transformers import AutoConfig, AutoTokenizer
-    from pecos.xmr.reranker.model import TextNumrEncoderConfig
-    from pecos.xmr.reranker.model import TextNumrEncoder
-
-    enc_list = [
-        "prajjwal1/bert-tiny",
-        "sentence-transformers/all-MiniLM-L6-v2",
-        "intfloat/multilingual-e5-small",
-    ]
-    ans_list = [
-        0.007879042997956276,
-        0.0035168465692549944,
-        -0.0047034271992743015,
-    ]
-    set_seed(1234)
-
-    for idx, enc_name in enumerate(enc_list):
-        text_config = AutoConfig.from_pretrained(
-            enc_name,
-            hidden_dropout_prob=0.0,
-        )
-        textnumr_config = TextNumrEncoderConfig(
-            text_config=text_config,
-            numr_config=None,
-            text_pooling_type="cls",
-            head_actv_type="identity",
-            head_dropout_prob=0.0,
-            head_size_list=[1],
-        )
-        textnumr_encoder = TextNumrEncoder(textnumr_config)
-        linear_layer = textnumr_encoder.head_layers.mlp_layers[0]
-        linear_layer.bias.data.fill_(0.0)
-        linear_layer.weight.data.fill_(0.0)
-        linear_layer.weight.data.fill_diagonal_(1.0)
-        textnumr_encoder.scorer.bias.data.fill_(0.0)
-        textnumr_encoder.scorer.weight.data.fill_(1.0)
-
-        # obtained from bert-tiny tokenizer("I Like coffee")
-        tokenizer = AutoTokenizer.from_pretrained(enc_name)
-        input_dict = tokenizer("I Like coffee", return_tensors="pt")
-        outputs = textnumr_encoder(**input_dict)
-        assert outputs.text_emb is not None
-        assert outputs.numr_emb is None
-
-        text_emb = outputs.text_emb
-        mu = torch.mean(text_emb).item()
-        assert mu == approx(
-            ans_list[idx],
-            abs=1e-3,
-        ), f"mu(text_emb)={mu} != {ans_list[idx]}"