diff --git a/kazu/steps/linking/post_processing/disambiguation/context_scoring.py b/kazu/steps/linking/post_processing/disambiguation/context_scoring.py index dc7be152..5a90fd6a 100644 --- a/kazu/steps/linking/post_processing/disambiguation/context_scoring.py +++ b/kazu/steps/linking/post_processing/disambiguation/context_scoring.py @@ -164,6 +164,7 @@ def __init__(self, contexts_path: str, model_path: str): self._calculate_id_vectors( directory=contexts_path_as_path.parent, filename=contexts_path_as_path.name ) + self.null_vector = self.vectorizer.transform([""]) @kazu_disk_cache.memoize(ignore={0, 1}) def _calculate_id_vectors(self, directory: Path, filename: str) -> None: @@ -216,9 +217,7 @@ def __call__( if maybe_id_vec is not None: idx_to_vec[idx] = maybe_id_vec else: - idx_to_vec[idx] = csr_matrix( - (0, self.vectorizer.max_features), dtype=np.float64 - ) + idx_to_vec[idx] = self.null_vector if idx_to_vec: idx_lst = list(idx_to_vec.keys()) scores = -(