From a4e26f1c730bdad74ae9284faead99adf67a066b Mon Sep 17 00:00:00 2001 From: Jyun-Yu Jiang Date: Fri, 31 Jan 2025 03:50:27 +0000 Subject: [PATCH] Remove a redundant XMR test; Add weights_only argument for torch.load; Bump CodeQL version to v3 --- .github/workflows/codeql.yml | 8 ++-- .github/workflows/pytest_aarch64.yml | 2 +- pecos/core/utils/file_util.hpp | 1 + pecos/xmc/xtransformer/matcher.py | 6 +-- pecos/xmc/xtransformer/module.py | 2 +- test/pecos/xmr/test_reranker.py | 55 ---------------------------- 6 files changed, 10 insertions(+), 64 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 8769cf1e..7bdf1abc 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -23,18 +23,18 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v2 + uses: github/codeql-action/autobuild@v3 # ℹī¸ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl @@ -48,4 +48,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@v3 diff --git a/.github/workflows/pytest_aarch64.yml b/.github/workflows/pytest_aarch64.yml index 812b4852..58e11b0c 100644 --- a/.github/workflows/pytest_aarch64.yml +++ b/.github/workflows/pytest_aarch64.yml @@ -6,7 +6,7 @@ jobs: Ubuntu-Python-Unit-Test: name: Ubuntu 22.04 Python3.10 Unit Tests - runs-on: ubuntu-latest + runs-on: ubuntu-22.04-arm steps: - uses: actions/checkout@v1 diff --git a/pecos/core/utils/file_util.hpp b/pecos/core/utils/file_util.hpp index 2f87c0e8..02230a37 100644 --- a/pecos/core/utils/file_util.hpp +++ b/pecos/core/utils/file_util.hpp @@ -15,6 +15,7 @@ #define __FILE_UTIL_H__ #include +#include #include #include #include diff --git a/pecos/xmc/xtransformer/matcher.py b/pecos/xmc/xtransformer/matcher.py index 9ed2a820..23309e9b 100644 --- a/pecos/xmc/xtransformer/matcher.py +++ b/pecos/xmc/xtransformer/matcher.py @@ -408,7 +408,7 @@ def load(cls, load_dir): # load text_model text_model_dir = os.path.join(load_dir, "text_model") if os.path.exists(text_model_dir): - text_model = torch.load(text_model_dir) + text_model = torch.load(text_model_dir, weights_only=False) else: text_model = None @@ -1330,7 +1330,7 @@ def train( saved_trn_pt = kwargs.get("saved_trn_pt", "") if not prob.is_tokenized: if saved_trn_pt and os.path.isfile(saved_trn_pt): - trn_tensors = torch.load(saved_trn_pt) + trn_tensors = torch.load(saved_trn_pt, weights_only=False) LOGGER.info("trn tensors loaded_from {}".format(saved_trn_pt)) else: trn_tensors = matcher.text_to_tensor( @@ -1345,7 +1345,7 @@ def train( if val_prob is not None and not val_prob.is_tokenized: saved_val_pt = kwargs.get("saved_val_pt", "") if saved_val_pt and os.path.isfile(saved_val_pt): - val_tensors = torch.load(saved_val_pt) + val_tensors = torch.load(saved_val_pt, weights_only=False) LOGGER.info("val tensors loaded from {}".format(saved_val_pt)) else: val_tensors = matcher.text_to_tensor( diff --git a/pecos/xmc/xtransformer/module.py b/pecos/xmc/xtransformer/module.py index dd396190..3afe3a98 100644 --- a/pecos/xmc/xtransformer/module.py +++ b/pecos/xmc/xtransformer/module.py @@ -454,7 +454,7 @@ def load(cls, load_dir, shard=0): nr_shards = cls.get_data_stats(load_dir)["num_shards"] if shard >= nr_shards: raise ValueError(f"Loading shard#{shard} where there are only {nr_shards} available") - return torch.load(f"{load_dir}/{shard}") + return torch.load(f"{load_dir}/{shard}", weights_only=False) @property def has_ns(self): diff --git a/test/pecos/xmr/test_reranker.py b/test/pecos/xmr/test_reranker.py index dfb226c6..efeb39d1 100644 --- a/test/pecos/xmr/test_reranker.py +++ b/test/pecos/xmr/test_reranker.py @@ -50,58 +50,3 @@ def test_numr_encoder(): out_feat.numpy(), abs=0.0, ), f"Enc(inp_feat) != inp_feat, given Enc is identity" - - -def test_textnumr_encoder(): - import torch - from transformers import set_seed - from transformers import AutoConfig, AutoTokenizer - from pecos.xmr.reranker.model import TextNumrEncoderConfig - from pecos.xmr.reranker.model import TextNumrEncoder - - enc_list = [ - "prajjwal1/bert-tiny", - "sentence-transformers/all-MiniLM-L6-v2", - "intfloat/multilingual-e5-small", - ] - ans_list = [ - 0.007879042997956276, - 0.0035168465692549944, - -0.0047034271992743015, - ] - set_seed(1234) - - for idx, enc_name in enumerate(enc_list): - text_config = AutoConfig.from_pretrained( - enc_name, - hidden_dropout_prob=0.0, - ) - textnumr_config = TextNumrEncoderConfig( - text_config=text_config, - numr_config=None, - text_pooling_type="cls", - head_actv_type="identity", - head_dropout_prob=0.0, - head_size_list=[1], - ) - textnumr_encoder = TextNumrEncoder(textnumr_config) - linear_layer = textnumr_encoder.head_layers.mlp_layers[0] - linear_layer.bias.data.fill_(0.0) - linear_layer.weight.data.fill_(0.0) - linear_layer.weight.data.fill_diagonal_(1.0) - textnumr_encoder.scorer.bias.data.fill_(0.0) - textnumr_encoder.scorer.weight.data.fill_(1.0) - - # obtained from bert-tiny tokenizer("I Like coffee") - tokenizer = AutoTokenizer.from_pretrained(enc_name) - input_dict = tokenizer("I Like coffee", return_tensors="pt") - outputs = textnumr_encoder(**input_dict) - assert outputs.text_emb is not None - assert outputs.numr_emb is None - - text_emb = outputs.text_emb - mu = torch.mean(text_emb).item() - assert mu == approx( - ans_list[idx], - abs=1e-3, - ), f"mu(text_emb)={mu} != {ans_list[idx]}"