From 4d8772addd6993654f59f36c2f07db2238054fb3 Mon Sep 17 00:00:00 2001 From: Alessandro Saccoia Date: Tue, 6 Aug 2024 18:57:42 +0200 Subject: [PATCH] Update code to use csr_matrix and np.float64 to address compatibility bug started with numpy 1.14.0 --- milvus_model/hybrid/bge_m3.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/milvus_model/hybrid/bge_m3.py b/milvus_model/hybrid/bge_m3.py index 46af630..88950db 100644 --- a/milvus_model/hybrid/bge_m3.py +++ b/milvus_model/hybrid/bge_m3.py @@ -1,7 +1,8 @@ import logging from typing import Dict, List -from scipy.sparse import csr_array, vstack +from scipy.sparse import csr_matrix, vstack +import numpy as np from milvus_model.base import BaseEmbeddingFunction from milvus_model.utils import import_FlagEmbedding @@ -78,9 +79,9 @@ def _encode(self, texts: List[str]) -> Dict: results["sparse"] = [] for sparse_vec in output["lexical_weights"]: indices = [int(k) for k in sparse_vec] - values = list(sparse_vec.values()) + values = np.array(list(sparse_vec.values()), dtype=np.float64) row_indices = [0] * len(indices) - csr = csr_array((values, (row_indices, indices)), shape=(1, sparse_dim)) + csr = csr_matrix((values, (row_indices, indices)), shape=(1, sparse_dim)) results["sparse"].append(csr) results["sparse"] = vstack(results["sparse"]).tocsr() if self._encode_config["return_colbert_vecs"] is True: