From 5554fb4197f730205456baafe960dae4676d0641 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christopher=20Schr=C3=B6der?=
 <chschroeder@users.noreply.github.com>
Date: Sat, 20 Apr 2024 23:45:39 +0200
Subject: [PATCH 1/3] Update showcase section in docs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Christopher Schröder <chschroeder@users.noreply.github.com>
---
 docs/showcase.rst | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/docs/showcase.rst b/docs/showcase.rst
index 687882a5..ed2980b6 100644
--- a/docs/showcase.rst
+++ b/docs/showcase.rst
@@ -2,9 +2,34 @@
 Showcase
 ========
 
+In this section, we collect publications, tutorials, and other resources that have used small-text.
+
+----
+
+.. contents:: Overview
+   :depth: 1
+   :local:
+   :backlinks: none
+
+----
+
 Papers
 ------
 
+2023
+^^^^
+
+- | David Kartchner, Irfan Al-Hussaini, Haydn Turner, Jennifer Deng, Shubham Lohiya, Prasanth Bathala, and Cassie S. Mitchell. 2023.
+  | `BioSift: A Dataset for Filtering Biomedical Abstracts for Drug Repurposing and Clinical Meta-Analysis. <https://dl.acm.org/doi/10.1145/3539618.3591897>`_
+  | In: Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval, pages 2913–2923.
+
+- | Klaus Schmidt, Andreas Niekler, Cathleen Kantner, and Manuel Burghardt. 2023.
+  | `Classifying Speech Acts in Political Communication: A Transformer-based Approach with Weak Supervision and Active Learning <http://dx.doi.org/10.15439/2023F3485>`_
+  | In: Proceedings of the 18th Conference on Computer Science and Intelligence Systems, ACSIS, Vol. 35, pages 739–748.
+
+2022
+^^^^
+
 - | Hannah Kirk, Bertie Vidgen, and Scott Hale. 2022.
   | `Is More Data Better? Re-thinking the Importance of Efficiency in Abusive Language Detection with Transformers-Based Active Learning. <https://aclanthology.org/2022.trac-1.7/>`_
   | In Proceedings of the Third Workshop on Threat, Aggression and Cyberbullying (TRAC 2022), pages 52–61, Gyeongju, Republic of Korea. Association for Computational Linguistics.

From c67d91c3779c97912ed1d6f2215bd5b86c835291 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christopher=20Schr=C3=B6der?=
 <chschroeder@users.noreply.github.com>
Date: Fri, 26 Apr 2024 21:48:58 +0200
Subject: [PATCH 2/3] Fix setfit seed control
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Christopher Schröder <chschroeder@users.noreply.github.com>
---
 CHANGELOG.md                                  | 12 +++++++++++
 .../transformers/classifiers/setfit.py        |  2 ++
 .../integrations/transformers/utils/setfit.py |  3 +++
 .../transformers/classifiers/test_setfit.py   | 20 +++++++++++++++++++
 .../transformers/classifiers/test_setfit.py   |  9 +++++++++
 5 files changed, 46 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3677a4af..19e77e2c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,17 @@
 # Changelog
 
+## Version 1.4.0 - unreleased
+
+### Fixed
+
+- Changed the way how the seed is controlled  in `SetFitClassification` since the seed was fixed unless explicitly set via the respective trainer keyword argument.
+
+### Changed
+
+- Documentation: Updated showcase section.
+
+---
+
 ## Version 1.3.3 - 2023-12-29
 
 ### Changed
diff --git a/small_text/integrations/transformers/classifiers/setfit.py b/small_text/integrations/transformers/classifiers/setfit.py
index d60ff668..9c2cebcb 100644
--- a/small_text/integrations/transformers/classifiers/setfit.py
+++ b/small_text/integrations/transformers/classifiers/setfit.py
@@ -257,11 +257,13 @@ def _get_train_and_valid_sets(self, x_train, y_train, x_valid, y_valid):
         return sub_train, sub_valid
 
     def _fit(self, sub_train, sub_valid, setfit_train_kwargs):
+        seed = np.random.randint(2**32-1)
         trainer = SetFitTrainer(
             self.model,
             sub_train,
             eval_dataset=sub_valid,
             batch_size=self.mini_batch_size,
+            seed=seed,
             **self.trainer_kwargs
         )
         trainer.train(max_length=self.max_seq_len, **setfit_train_kwargs)
diff --git a/small_text/integrations/transformers/utils/setfit.py b/small_text/integrations/transformers/utils/setfit.py
index 2ff647fc..fea6126b 100644
--- a/small_text/integrations/transformers/utils/setfit.py
+++ b/small_text/integrations/transformers/utils/setfit.py
@@ -22,6 +22,9 @@ def _check_trainer_kwargs(trainer_kwargs):
         raise ValueError('Invalid keyword argument in trainer_kwargs: '
                          'Argument "batch_size" can be set via "mini_batch_size" in '
                          'SetFitClassification.')
+    if 'seed' in trainer_kwargs:
+        raise ValueError('Invalid keyword argument in trainer_kwargs: '
+                         'Argument "seed" cannot be set via train_kwargs.')
     return trainer_kwargs
 
 
diff --git a/tests/integration/small_text/integrations/transformers/classifiers/test_setfit.py b/tests/integration/small_text/integrations/transformers/classifiers/test_setfit.py
index 48bff3ef..df6590c6 100644
--- a/tests/integration/small_text/integrations/transformers/classifiers/test_setfit.py
+++ b/tests/integration/small_text/integrations/transformers/classifiers/test_setfit.py
@@ -324,6 +324,26 @@ def test_fit_with_non_default_settings(self):
             self.assertEqual(1, train_mock.call_count)
             self.assertEqual(max_seq_len, train_mock.call_args_list[0].kwargs['max_length'])
 
+    def test_fit_prevent_fixed_seed(self):
+        ds = twenty_news_text(10, num_classes=self.num_classes, multi_label=self.multi_label)
+        num_classes = 5
+
+        setfit_model_args = SetFitModelArguments('sentence-transformers/all-MiniLM-L6-v2')
+        setfit_train_kwargs = {'show_progress_bar': False}
+
+        with patch('setfit.trainer.set_seed') as set_seed_mock:
+            clf = SetFitClassification(setfit_model_args, num_classes, multi_label=self.multi_label)
+
+            clf.fit(ds, setfit_train_kwargs=setfit_train_kwargs)
+            self.assertEqual(1, set_seed_mock.call_count)
+            first_seed = set_seed_mock.call_args_list[0][0]
+
+            clf.fit(ds, setfit_train_kwargs=setfit_train_kwargs)
+            self.assertEqual(2, set_seed_mock.call_count)
+            second_seed = set_seed_mock.call_args_list[1][0]
+
+            self.assertNotEqual(first_seed, second_seed)
+
 
 @pytest.mark.pytorch
 @pytest.mark.optional
diff --git a/tests/unit/small_text/integrations/transformers/classifiers/test_setfit.py b/tests/unit/small_text/integrations/transformers/classifiers/test_setfit.py
index 701772f5..b01db75f 100644
--- a/tests/unit/small_text/integrations/transformers/classifiers/test_setfit.py
+++ b/tests/unit/small_text/integrations/transformers/classifiers/test_setfit.py
@@ -79,6 +79,15 @@ def test_init_with_misplaced_batch_size_kwargs(self):
         with self.assertRaisesRegex(ValueError, 'Invalid keyword argument in trainer_kwargs'):
             SetFitClassification(setfit_model_args, num_classes, trainer_kwargs=trainer_kwargs)
 
+    def test_init_with_misplaced_seed_kwargs(self):
+        setfit_model_args = SetFitModelArguments('sentence-transformers/all-MiniLM-L6-v2')
+        num_classes = 5
+
+        trainer_kwargs = {'seed': 4242}
+
+        with self.assertRaisesRegex(ValueError, 'Invalid keyword argument in trainer_kwargs'):
+            SetFitClassification(setfit_model_args, num_classes, trainer_kwargs=trainer_kwargs)
+
 
 class _SetFitClassification(object):
 

From 668c326a6ea8d32ded5c661bf4f77769c2a22f15 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christopher=20Schr=C3=B6der?=
 <chschroeder@users.noreply.github.com>
Date: Fri, 26 Apr 2024 21:50:22 +0200
Subject: [PATCH 3/3] Bump version
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Christopher Schröder <chschroeder@users.noreply.github.com>
---
 small_text/version.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/small_text/version.json b/small_text/version.json
index e175b773..6032499f 100644
--- a/small_text/version.json
+++ b/small_text/version.json
@@ -1,6 +1,6 @@
 {
   "major": 1,
-  "minor": 3,
-  "micro": 3,
-  "pre_release": ""
+  "minor": 4,
+  "micro": 0,
+  "pre_release": "dev1"
 }
\ No newline at end of file