CU-8694vte2g 1.12 depr removal (CogStack#454)

* CU-8694vte2g: Remove CDB.add_concept method * CU-8694vte2g: Remove unused import (deprecated decorator) * CU-8694vte2g: Remove CAT.get_spacy_nlp method * CU-8694vte2g: Remove CAT.train_supervised method * CU-8694vte2g: Remove CAT multiprocessing methods * CU-8694vte2g: Remove MetaCAT.train method * CU-8694vte2g: Remove medcat.utils.ner.helper.deid_text method * CU-8694vte2g: Remove use of deprecated method * CU-8694vte2g: Add back removed deprecation import
Phamily · Jun 19, 2024 · 7058ec4 · 7058ec4
1 parent 1c3628d
commit 7058ec4
Show file tree

Hide file tree

Showing 6 changed files with 11 additions and 187 deletions.
diff --git a/medcat/cat.py b/medcat/cat.py
@@ -16,7 +16,6 @@
 from datetime import date
 from tqdm.autonotebook import tqdm, trange
 from spacy.tokens import Span, Doc, Token
-from spacy.language import Language
 import humanfriendly
 
 from medcat import __version__
@@ -37,7 +36,6 @@
 from medcat.utils.meta_cat.data_utils import json_to_fake_spacy
 from medcat.config import Config
 from medcat.vocab import Vocab
-from medcat.utils.decorators import deprecated
 from medcat.ner.transformers_ner import TransformersNER
 from medcat.utils.saving.serializer import SPECIALITY_NAMES, ONE2MANY
 from medcat.utils.saving.envsnapshot import get_environment_info, ENV_SNAPSHOT_FILE_NAME
@@ -147,16 +145,6 @@ def _create_pipeline(self, config: Config):
         # Set max document length
         self.pipe.spacy_nlp.max_length = config.preprocessing.max_document_length
 
-    @deprecated(message="Replaced with cat.pipe.spacy_nlp.",
-                depr_version=(1, 2, 7), removal_version=(1, 12, 0))
-    def get_spacy_nlp(self) -> Language:
-        """Returns the spacy pipeline with MedCAT
-
-        Returns:
-            Language: The spacy Language being used.
-        """
-        return self.pipe.spacy_nlp
-
     def get_hash(self, force_recalc: bool = False) -> str:
         """Will not be a deep hash but will try to catch all the changing parts during training.
 
@@ -772,43 +760,6 @@ def add_and_train_concept(self,
                 for _cui in cuis:
                     self.linker.context_model.train(cui=_cui, entity=spacy_entity, doc=spacy_doc, negative=True)  # type: ignore
 
-    @deprecated(message="Use train_supervised_from_json to train based on data "
-                "loaded from a json file",
-                depr_version=(1, 8, 0), removal_version=(1, 12, 0))
-    def train_supervised(self,
-                         data_path: str,
-                         reset_cui_count: bool = False,
-                         nepochs: int = 1,
-                         print_stats: int = 0,
-                         use_filters: bool = False,
-                         terminate_last: bool = False,
-                         use_overlaps: bool = False,
-                         use_cui_doc_limit: bool = False,
-                         test_size: int = 0,
-                         devalue_others: bool = False,
-                         use_groups: bool = False,
-                         never_terminate: bool = False,
-                         train_from_false_positives: bool = False,
-                         extra_cui_filter: Optional[Set] = None,
-                         retain_extra_cui_filter: bool = False,
-                         checkpoint: Optional[Checkpoint] = None,
-                         retain_filters: bool = False,
-                         is_resumed: bool = False) -> Tuple:
-        """Train supervised by reading data from a json file.
-
-        Refer to `train_supervvised_from_json` and/or `train_supervised_raw`
-        for further details.
-
-        # noqa: DAR101
-        # noqa: DAR201
-        """
-        return self.train_supervised_from_json(data_path, reset_cui_count, nepochs,
-                                               print_stats, use_filters, terminate_last,
-                                               use_overlaps, use_cui_doc_limit, test_size,
-                                               devalue_others, use_groups, never_terminate,
-                                               train_from_false_positives, extra_cui_filter,
-                                               retain_extra_cui_filter, checkpoint,
-                                               retain_filters, is_resumed)
 
     def train_supervised_from_json(self,
                                    data_path: str,
@@ -1274,26 +1225,6 @@ def _save_docs_to_file(self, docs: Iterable, annotated_ids: List[str], save_dir_
             pickle.dump((annotated_ids, part_counter), open(annotated_ids_path, 'wb'))
         return part_counter
 
-    @deprecated(message="Use `multiprocessing_batch_char_size` instead",
-                depr_version=(1, 10, 0), removal_version=(1, 12, 0))
-    def multiprocessing(self,
-                        data: Union[List[Tuple], Iterable[Tuple]],
-                        nproc: int = 2,
-                        batch_size_chars: int = 5000 * 1000,
-                        only_cui: bool = False,
-                        addl_info: List[str] = ['cui2icd10', 'cui2ontologies', 'cui2snomed'],
-                        separate_nn_components: bool = True,
-                        out_split_size_chars: Optional[int] = None,
-                        save_dir_path: str = os.path.abspath(os.getcwd()),
-                        min_free_memory=0.1) -> Dict:
-        return self.multiprocessing_batch_char_size(data=data, nproc=nproc,
-                                                    batch_size_chars=batch_size_chars,
-                                                    only_cui=only_cui, addl_info=addl_info,
-                                                    separate_nn_components=separate_nn_components,
-                                                    out_split_size_chars=out_split_size_chars,
-                                                    save_dir_path=save_dir_path,
-                                                    min_free_memory=min_free_memory)
-
     def multiprocessing_batch_char_size(self,
                                         data: Union[List[Tuple], Iterable[Tuple]],
                                         nproc: int = 2,
@@ -1548,22 +1479,6 @@ def _multiprocessing_batch(self,
 
         return docs
 
-    @deprecated(message="Use `multiprocessing_batch_docs_size` instead",
-                depr_version=(1, 10, 0), removal_version=(1, 12, 0))
-    def multiprocessing_pipe(self, in_data: Union[List[Tuple], Iterable[Tuple]],
-                             nproc: Optional[int] = None,
-                             batch_size: Optional[int] = None,
-                             only_cui: bool = False,
-                             addl_info: List[str] = [],
-                             return_dict: bool = True,
-                             batch_factor: int = 2) -> Union[List[Tuple], Dict]:
-        return self.multiprocessing_batch_docs_size(in_data=in_data, nproc=nproc,
-                                                    batch_size=batch_size,
-                                                    only_cui=only_cui,
-                                                    addl_info=addl_info,
-                                                    return_dict=return_dict,
-                                                    batch_factor=batch_factor)
-
     def multiprocessing_batch_docs_size(self,
                                         in_data: Union[List[Tuple], Iterable[Tuple]],
                                         nproc: Optional[int] = None,

diff --git a/medcat/cdb.py b/medcat/cdb.py
@@ -12,8 +12,8 @@
 from medcat.utils.hasher import Hasher
 from medcat.utils.matutils import unitvec
 from medcat.utils.ml_utils import get_lr_linking
-from medcat.utils.decorators import deprecated
 from medcat.config import Config, workers
+from medcat.utils.decorators import deprecated
 from medcat.utils.saving.serializer import CDBSerializer
 from medcat.utils.config_utils import get_and_del_weighted_average_from_config
 from medcat.utils.config_utils import default_weighted_average
@@ -252,45 +252,6 @@ def add_names(self, cui: str, names: Dict[str, Dict], name_status: str = 'A', fu
 
         self._add_concept(cui=cui, names=names, ontologies=set(), name_status=name_status, type_ids=set(), description='', full_build=full_build)
 
-    @deprecated("Use `cdb._add_concept` as this will be removed in a future release.",
-                depr_version=(1, 10, 0), removal_version=(1, 12, 0))
-    def add_concept(self,
-                    cui: str,
-                    names: Dict[str, Dict],
-                    ontologies: Set[str],
-                    name_status: str,
-                    type_ids: Set[str],
-                    description: str,
-                    full_build: bool = False) -> None:
-        """
-        Deprecated: Use `cdb._add_concept` as this will be removed in a future release.
-
-        Add a concept to internal Concept Database (CDB). Depending on what you are providing
-        this will add a large number of properties for each concept.
-
-        Args:
-            cui (str):
-                Concept ID or unique identifier in this database, all concepts that have
-                the same CUI will be merged internally.
-            names (Dict[str, Dict]):
-                Names for this concept, or the value that if found in free text can be linked to this concept.
-                Names is a dict like: `{name: {'tokens': tokens, 'snames': snames, 'raw_name': raw_name}, ...}`
-                Names should be generated by helper function 'medcat.preprocessing.cleaners.prepare_name'
-            ontologies (Set[str]):
-                ontologies in which the concept exists (e.g. SNOMEDCT, HPO)
-            name_status (str):
-                One of `P`, `N`, `A`
-            type_ids (Set[str]):
-                Semantic type identifier (have a look at TUIs in UMLS or SNOMED-CT)
-            description (str):
-                Description of this concept.
-            full_build (bool):
-                If True the dictionary self.addl_info will also be populated, contains a lot of extra information
-                about concepts, but can be very memory consuming. This is not necessary
-                for normal functioning of MedCAT (Default Value `False`).
-        """
-        self._add_concept(cui, names, ontologies, name_status, type_ids, description, full_build)
-
     def _add_concept(self,
                     cui: str,
                     names: Dict[str, Dict],

diff --git a/medcat/meta_cat.py b/medcat/meta_cat.py
@@ -15,7 +15,6 @@
 from medcat.pipeline.pipe_runner import PipeRunner
 from medcat.tokenizers.meta_cat_tokenizers import TokenizerWrapperBase
 from medcat.utils.meta_cat.data_utils import Doc as FakeDoc
-from medcat.utils.decorators import deprecated
 from peft import get_peft_model, LoraConfig, TaskType
 
 # It should be safe to do this always, as all other multiprocessing
@@ -121,26 +120,6 @@ def get_hash(self) -> str:
         hasher.update(self.config.get_hash())
         return hasher.hexdigest()
 
-    @deprecated(message="Use `train_from_json` or `train_raw` instead",
-                depr_version=(1, 8, 0), removal_version=(1, 12, 0))
-    def train(self, json_path: Union[str, list], save_dir_path: Optional[str] = None, data_oversampled: Optional[list] = None) -> Dict:
-        """Train or continue training a model give a json_path containing a MedCATtrainer export. It will
-        continue training if an existing model is loaded or start new training if the model is blank/new.
-
-        Args:
-            json_path (Union[str, list]):
-                Path/Paths to a MedCATtrainer export containing the meta_annotations we want to train for.
-            save_dir_path (Optional[str]):
-                In case we have aut_save_model (meaning during the training the best model will be saved)
-                we need to set a save path. Defaults to `None`.
-            data_oversampled (Optional[list]):
-                In case of oversampling being performed, the data will be passed in the parameter
-
-        Returns:
-            Dict: The resulting report.
-        """
-        return self.train_from_json(json_path, save_dir_path, data_oversampled=data_oversampled)
-
     def train_from_json(self, json_path: Union[str, list], save_dir_path: Optional[str] = None,
                         data_oversampled: Optional[list] = None) -> Dict:
         """Train or continue training a model give a json_path containing a MedCATtrainer export. It will

diff --git a/medcat/utils/ner/__init__.py b/medcat/utils/ner/__init__.py
@@ -1,2 +1,2 @@
 from .metrics import metrics
-from .helpers import deid_text, make_or_update_cdb
+from .helpers import make_or_update_cdb
diff --git a/medcat/utils/ner/deid.py b/medcat/utils/ner/deid.py
@@ -40,7 +40,7 @@
 from medcat.cat import CAT
 from medcat.utils.ner.model import NerModel
 
-from medcat.utils.ner.helpers import _deid_text as deid_text, replace_entities_in_text
+from medcat.utils.ner.helpers import replace_entities_in_text
 
 
 logger = logging.getLogger(__name__)
@@ -69,15 +69,21 @@ def train(self, json_path: Union[str, list, None],
     def deid_text(self, text: str, redact: bool = False) -> str:
         """Deidentify text and potentially redact information.
 
+        De-identified text.
+        If redaction is enabled, identifiable entities will be
+        replaced with starts (e.g `*****`).
+        Otherwise, the replacement will be the CUI or in other words,
+        the type of information that was hidden (e.g [PATIENT]).
+
         Args:
             text (str): The text to deidentify.
             redact (bool): Whether to redact the information.
 
         Returns:
             str: The deidentified text.
         """
-        self.cat.get_entities
-        return deid_text(self.cat, text, redact=redact)
+        entities = self.cat.get_entities(text)['entities']
+        return replace_entities_in_text(text, entities, self.cat.cdb.get_name, redact=redact)
 
     def deid_multi_texts(self,
                          texts: Union[Iterable[str], Iterable[Tuple]],

diff --git a/medcat/utils/ner/helpers.py b/medcat/utils/ner/helpers.py
@@ -3,35 +3,6 @@
 from medcat.utils.data_utils import count_annotations
 from medcat.cdb import CDB
 
-from medcat.utils.decorators import deprecated
-
-
-# For now, we will keep this method separate from the above class
-# This is so that we wouldn't need to create a thorwaway object
-# when calling the method from .helpers where it used to be.
-# After the deprecated method in .helpers is removed, we can
-# move this to a proper class method.
-def _deid_text(cat, text: str, redact: bool = False) -> str:
-    """De-identify text.
-
-    De-identified text.
-    If redaction is enabled, identifiable entities will be
-    replaced with starts (e.g `*****`).
-    Otherwise, the replacement will be the CUI or in other words,
-    the type of information that was hidden (e.g [PATIENT]).
-
-
-    Args:
-        cat (CAT): The CAT object to use for deid.
-        text (str): The input document.
-        redact (bool): Whether to redact. Defaults to False.
-
-    Returns:
-        str: The de-identified document.
-    """
-    entities = cat.get_entities(text)['entities']
-    return replace_entities_in_text(text, entities, cat.cdb.get_name, redact=redact)
-
 
 def replace_entities_in_text(text: str,
                              entities: Dict,
@@ -45,14 +16,6 @@ def replace_entities_in_text(text: str,
     return new_text
 
 
-@deprecated("API now allows creating a DeId model (medcat.utils.ner.deid.DeIdModel). "
-            "It aims to simplify the usage of DeId models. "
-            "The use of this model is encouraged over the use of this method.",
-            depr_version=(1, 8, 0), removal_version=(1, 12, 0))
-def deid_text(*args, **kwargs) -> str:
-    return _deid_text(*args, **kwargs)
-
-
 def make_or_update_cdb(json_path: str, cdb: Optional[CDB] = None,
                        min_count: int = 0) -> CDB:
     """Creates a new CDB or updates an existing one with new