Remove experimental flag from Dataset.from_arrays() methods

Signed-off-by: Christopher Schröder <chschroeder@users.noreply.github.com>
webis-de · May 1, 2024 · 893dae6 · 893dae6
1 parent 6e80bd4
commit 893dae6
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 23 deletions.
diff --git a/small_text/data/datasets.py b/small_text/data/datasets.py
@@ -9,7 +9,6 @@
 from small_text.base import LABEL_UNLABELED
 from small_text.data.exceptions import UnsupportedOperationException
 
-from small_text.utils.annotations import experimental
 from small_text.utils.data import list_length
 from small_text.utils.labels import get_flattened_unique_labels
 
@@ -460,7 +459,6 @@ def clone(self):
         return SklearnDataset(x, y, target_labels=target_labels)
 
     @classmethod
-    @experimental
     def from_arrays(cls, texts, y, vectorizer, target_labels=None, train=True):
         """Constructs a new SklearnDataset from the given text and label arrays.
 
@@ -491,9 +489,6 @@ def from_arrays(cls, texts, y, vectorizer, target_labels=None, train=True):
            <https://scikit-learn.org/stable/modules/classes.html
            #module-sklearn.feature_extraction.text>`_
 
-        .. warning::
-           This functionality is still experimental and may be subject to change.
-
         .. versionadded:: 1.1.0
         """
         if train:
@@ -617,7 +612,6 @@ def clone(self):
         return TextDataset(x, y, target_labels=target_labels)
 
     @classmethod
-    @experimental
     def from_arrays(cls, texts, y, target_labels=None):
         """Constructs a new TextDataset from the given text and label arrays.
 
@@ -643,9 +637,6 @@ def from_arrays(cls, texts, y, target_labels=None):
            <https://scikit-learn.org/stable/modules/classes.html
            #module-sklearn.feature_extraction.text>`_
 
-        .. warning::
-           This functionality is still experimental and may be subject to change.
-
         .. versionadded:: 1.2.0
         """
 

diff --git a/small_text/integrations/pytorch/datasets.py b/small_text/integrations/pytorch/datasets.py
@@ -7,7 +7,6 @@
 from small_text.data import DatasetView
 from small_text.data.datasets import check_size, check_target_labels, get_updated_target_labels
 from small_text.data.exceptions import UnsupportedOperationException
-from small_text.utils.annotations import experimental
 from small_text.utils.labels import csr_to_list, get_num_labels, list_to_csr
 
 
@@ -319,7 +318,6 @@ def to(self, other, non_blocking=False, copy=False):
             return self
 
     @classmethod
-    @experimental
     def from_arrays(cls, texts, y, tokenizer, target_labels=None, max_length=512):
         """Constructs a new PytorchTextClassificationDataset from the given text and label arrays.
 
@@ -331,8 +329,9 @@ def from_arrays(cls, texts, y, tokenizer, target_labels=None, max_length=512):
             List of labels where each label belongs to the features of the respective row.
             Depending on the type of `y` the resulting dataset will be single-label (`np.ndarray`)
             or multi-label (`scipy.sparse.csr_matrix`).
-        tokenizer : small_text.data.tokenizers.Tokenizer
-            A tokenizer that is used to convert each of the given text documents into tokens.
+        tokenizer : tokenizers.Tokenizer
+            A tokenizer from the tokenizers library that is used to convert each of the given text documents
+            into tokens.
         target_labels : numpy.ndarray[int] or None, default=None
             List of possible labels. Will be directly passed to the datset constructor.
         max_length : int
@@ -344,9 +343,6 @@ def from_arrays(cls, texts, y, tokenizer, target_labels=None, max_length=512):
             A dataset constructed from the given texts and labels.
 
 
-        .. warning::
-           This functionality is still experimental and may be subject to change.
-
         .. versionadded:: 1.1.0
         .. versionchanged:: 2.0.0
         """

diff --git a/small_text/integrations/transformers/datasets.py b/small_text/integrations/transformers/datasets.py
@@ -4,7 +4,6 @@
 from small_text.base import LABEL_UNLABELED
 from small_text.data.datasets import check_size, check_target_labels, get_updated_target_labels
 from small_text.integrations.pytorch.exceptions import PytorchNotFoundError
-from small_text.utils.annotations import experimental
 from small_text.utils.labels import csr_to_list, get_num_labels, list_to_csr
 
 try:
@@ -214,7 +213,6 @@ def to(self, other, non_blocking=False, copy=False):
             return self
 
     @classmethod
-    @experimental
     def from_arrays(cls, texts, y, tokenizer, target_labels=None, max_length=512):
         """Constructs a new TransformersDataset from the given text and label arrays.
 
@@ -227,9 +225,10 @@ def from_arrays(cls, texts, y, tokenizer, target_labels=None, max_length=512):
             Depending on the type of `y` the resulting dataset will be single-label (`np.ndarray`)
             or multi-label (`scipy.sparse.csr_matrix`).
         tokenizer : tokenizers.Tokenizer
-            A huggingface tokenizer.
+            A tokenizer from the tokenizers library that is used to convert each of the given text documents
+            into tokens.
         target_labels : numpy.ndarray[int] or None, default=None
-            List of possible labels. Will be directly passed to the datset constructor.
+            List of possible labels. Will be directly passed to the dataset constructor.
         max_length : int
             Maximum sequence length.
 
@@ -239,9 +238,6 @@ def from_arrays(cls, texts, y, tokenizer, target_labels=None, max_length=512):
             A dataset constructed from the given texts and labels.
 
 
-        .. warning::
-           This functionality is still experimental and may be subject to change.
-
         .. versionadded:: 1.1.0
         """
         data_out = []