diff --git a/notebooks/development/029-deterministic-test.ipynb b/notebooks/development/029-deterministic-test.ipynb
new file mode 100644
index 0000000..0329ace
--- /dev/null
+++ b/notebooks/development/029-deterministic-test.ipynb
@@ -0,0 +1,406 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import datetime\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.svm import SVC\n",
+    "from sklearn.neighbors import KNeighborsClassifier\n",
+    "from sklearn.model_selection import RepeatedStratifiedKFold\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.metrics import roc_auc_score\n",
+    "\n",
+    "from smote_variants.oversampling import SMOTE\n",
+    "from common_datasets.binary_classification import get_filtered_data_loaders\n",
+    "import common_datasets.binary_classification as binclas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging\n",
+    "logger = logging.getLogger('smote_variants')\n",
+    "logger.setLevel(logging.ERROR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(498, 21)"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "binclas.load_cm1()['data'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "classifiers = {\n",
+    "DecisionTreeClassifier: [{'max_depth': md, 'random_state': 5} for md in range(2, 10, 2)],\n",
+    "RandomForestClassifier: [{'max_depth': md, 'random_state': 5} for md in range(2, 10, 2)],\n",
+    "KNeighborsClassifier: [{'n_neighbors': nn} for nn in range(1, 10, 2)],\n",
+    "SVC: [{'C': c, 'probability': True, 'random_state': 5} for c in [0.001, 0.01, 0.1]]\\\n",
+    "            + [{'C': c, 'probability': True, 'kernel': 'poly', 'degree': 2, 'random_state': 5} for c in [0.001, 0.01, 0.1]]\\\n",
+    "            + [{'C': c, 'probability': True, 'kernel': 'poly', 'degree': 3, 'random_state': 5} for c in [0.001, 0.01, 0.1]]\\\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "datasets = get_filtered_data_loaders(n_col_bounds=(2, 40),\n",
+    "                                        n_bounds=(10, 500),\n",
+    "                                        n_minority_bounds=(10, 500),\n",
+    "                                        n_from_phenotypes=1,\n",
+    "                                        n_smallest=20)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "smote_params = [\n",
+    "    {'n_neighbors': 3, 'proportion': 0.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'random'}},\n",
+    "    {'n_neighbors': 5, 'proportion': 0.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'random'}},\n",
+    "    {'n_neighbors': 7, 'proportion': 0.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'random'}},\n",
+    "    {'n_neighbors': 3, 'proportion': 1.0, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'random'}},\n",
+    "    {'n_neighbors': 5, 'proportion': 1.0, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'random'}},\n",
+    "    {'n_neighbors': 7, 'proportion': 1.0, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'random'}},\n",
+    "    {'n_neighbors': 3, 'proportion': 1.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'random'}},\n",
+    "    {'n_neighbors': 5, 'proportion': 1.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'random'}},\n",
+    "    {'n_neighbors': 7, 'proportion': 1.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'random'}},\n",
+    "    {'n_neighbors': 3, 'proportion': 0.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'deterministic'}},\n",
+    "    {'n_neighbors': 5, 'proportion': 0.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'deterministic'}},\n",
+    "    {'n_neighbors': 7, 'proportion': 0.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'deterministic'}},\n",
+    "    {'n_neighbors': 3, 'proportion': 1.0, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'deterministic'}},\n",
+    "    {'n_neighbors': 5, 'proportion': 1.0, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'deterministic'}},\n",
+    "    {'n_neighbors': 7, 'proportion': 1.0, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'deterministic'}},\n",
+    "    {'n_neighbors': 3, 'proportion': 1.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'deterministic'}},\n",
+    "    {'n_neighbors': 5, 'proportion': 1.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'deterministic'}},\n",
+    "    {'n_neighbors': 7, 'proportion': 1.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'deterministic'}},\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-12-14 21:06:31.104964 appendicitis\n",
+      "2023-12-14 21:23:36.600311 bupa\n",
+      "2023-12-14 21:47:51.281804 cleveland-0_vs_4\n",
+      "2023-12-14 22:07:33.462380 CM1\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[6], line 28\u001b[0m\n\u001b[1;32m     26\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m param \u001b[38;5;129;01min\u001b[39;00m cparams:\n\u001b[1;32m     27\u001b[0m     classifier_obj \u001b[38;5;241m=\u001b[39m classifier(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparam)\n\u001b[0;32m---> 28\u001b[0m     \u001b[43mclassifier_obj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_samp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_samp\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     29\u001b[0m     y_pred \u001b[38;5;241m=\u001b[39m classifier_obj\u001b[38;5;241m.\u001b[39mpredict_proba(X_test)\n\u001b[1;32m     30\u001b[0m     auc \u001b[38;5;241m=\u001b[39m roc_auc_score(y_test, y_pred[:, \u001b[38;5;241m1\u001b[39m])\n",
+      "File \u001b[0;32m~/anaconda3/envs/smote-variants/lib/python3.10/site-packages/sklearn/base.py:1152\u001b[0m, in \u001b[0;36m_fit_context.<locals>.decorator.<locals>.wrapper\u001b[0;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1145\u001b[0m     estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[1;32m   1147\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[1;32m   1148\u001b[0m     skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m   1149\u001b[0m         prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[1;32m   1150\u001b[0m     )\n\u001b[1;32m   1151\u001b[0m ):\n\u001b[0;32m-> 1152\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfit_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/anaconda3/envs/smote-variants/lib/python3.10/site-packages/sklearn/svm/_base.py:250\u001b[0m, in \u001b[0;36mBaseLibSVM.fit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m    247\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[LibSVM]\u001b[39m\u001b[38;5;124m\"\u001b[39m, end\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    249\u001b[0m seed \u001b[38;5;241m=\u001b[39m rnd\u001b[38;5;241m.\u001b[39mrandint(np\u001b[38;5;241m.\u001b[39miinfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mi\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mmax)\n\u001b[0;32m--> 250\u001b[0m \u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msolver_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkernel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_seed\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mseed\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    251\u001b[0m \u001b[38;5;66;03m# see comment on the other call to np.iinfo in this file\u001b[39;00m\n\u001b[1;32m    253\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mshape_fit_ \u001b[38;5;241m=\u001b[39m X\u001b[38;5;241m.\u001b[39mshape \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(X, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mshape\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m (n_samples,)\n",
+      "File \u001b[0;32m~/anaconda3/envs/smote-variants/lib/python3.10/site-packages/sklearn/svm/_base.py:329\u001b[0m, in \u001b[0;36mBaseLibSVM._dense_fit\u001b[0;34m(self, X, y, sample_weight, solver_type, kernel, random_seed)\u001b[0m\n\u001b[1;32m    315\u001b[0m libsvm\u001b[38;5;241m.\u001b[39mset_verbosity_wrap(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose)\n\u001b[1;32m    317\u001b[0m \u001b[38;5;66;03m# we don't pass **self.get_params() to allow subclasses to\u001b[39;00m\n\u001b[1;32m    318\u001b[0m \u001b[38;5;66;03m# add other parameters to __init__\u001b[39;00m\n\u001b[1;32m    319\u001b[0m (\n\u001b[1;32m    320\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msupport_,\n\u001b[1;32m    321\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msupport_vectors_,\n\u001b[1;32m    322\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_n_support,\n\u001b[1;32m    323\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdual_coef_,\n\u001b[1;32m    324\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mintercept_,\n\u001b[1;32m    325\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_probA,\n\u001b[1;32m    326\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_probB,\n\u001b[1;32m    327\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfit_status_,\n\u001b[1;32m    328\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_num_iter,\n\u001b[0;32m--> 329\u001b[0m ) \u001b[38;5;241m=\u001b[39m \u001b[43mlibsvm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    330\u001b[0m \u001b[43m    \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    331\u001b[0m \u001b[43m    \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    332\u001b[0m \u001b[43m    \u001b[49m\u001b[43msvm_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msolver_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    333\u001b[0m \u001b[43m    \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    334\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;66;43;03m# TODO(1.4): Replace \"_class_weight\" with \"class_weight_\"\u001b[39;49;00m\n\u001b[1;32m    335\u001b[0m \u001b[43m    \u001b[49m\u001b[43mclass_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m_class_weight\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mempty\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    336\u001b[0m \u001b[43m    \u001b[49m\u001b[43mkernel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkernel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    337\u001b[0m \u001b[43m    \u001b[49m\u001b[43mC\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mC\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    338\u001b[0m \u001b[43m    \u001b[49m\u001b[43mnu\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnu\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    339\u001b[0m \u001b[43m    \u001b[49m\u001b[43mprobability\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprobability\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    340\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdegree\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdegree\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    341\u001b[0m \u001b[43m    \u001b[49m\u001b[43mshrinking\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshrinking\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    342\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtol\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtol\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    343\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcache_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcache_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    344\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcoef0\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcoef0\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    345\u001b[0m \u001b[43m    \u001b[49m\u001b[43mgamma\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_gamma\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    346\u001b[0m \u001b[43m    \u001b[49m\u001b[43mepsilon\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mepsilon\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    347\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmax_iter\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_iter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    348\u001b[0m \u001b[43m    \u001b[49m\u001b[43mrandom_seed\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_seed\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    349\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    351\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_warn_from_fit_status()\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "for data_loader in datasets:\n",
+    "    results = []\n",
+    "    dataset = data_loader()\n",
+    "    print(datetime.datetime.now(), dataset['name'])\n",
+    "    X = dataset['data']\n",
+    "    y = dataset['target']\n",
+    "\n",
+    "    validator = RepeatedStratifiedKFold(n_splits=5, n_repeats=20, random_state=5)\n",
+    "\n",
+    "    for fidx, (train, test) in enumerate(validator.split(X, y, y)):\n",
+    "        X_train = X[train]\n",
+    "        X_test = X[test]\n",
+    "        y_train = y[train]\n",
+    "        y_test = y[test]\n",
+    "\n",
+    "        ss = StandardScaler()\n",
+    "        ss.fit(X_train)\n",
+    "        X_train = ss.transform(X_train)\n",
+    "        X_test = ss.transform(X_test)\n",
+    "\n",
+    "        for sparam in smote_params:\n",
+    "            oversampling = SMOTE(**sparam)\n",
+    "            X_samp, y_samp = oversampling.sample(X_train, y_train)\n",
+    "\n",
+    "            for classifier, cparams in classifiers.items():\n",
+    "                for param in cparams:\n",
+    "                    classifier_obj = classifier(**param)\n",
+    "                    classifier_obj.fit(X_samp, y_samp)\n",
+    "                    y_pred = classifier_obj.predict_proba(X_test)\n",
+    "                    auc = roc_auc_score(y_test, y_pred[:, 1])\n",
+    "                    results.append({'name': dataset['name'],\n",
+    "                                    'fold': fidx,\n",
+    "                                    'sparam': sparam,\n",
+    "                                    'classifier': classifier.__name__,\n",
+    "                                    'cparam': param,\n",
+    "                                    'auc': auc})\n",
+    "    data = pd.DataFrame.from_dict(results)\n",
+    "    data.to_csv(f'{dataset[\"name\"]}.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>fold</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.582353</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.723529</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.764706</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.782353</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.976471</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>441</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>1</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.647059</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>442</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>1</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.661765</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>443</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>1</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.661765</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>444</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>1</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.647059</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>445</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>1</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.676471</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>446 rows × 6 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             name  fold                                             sparam  \\\n",
+       "0    appendicitis     0  {'n_neighbors': 3, 'proportion': 0.5, 'random_...   \n",
+       "1    appendicitis     0  {'n_neighbors': 3, 'proportion': 0.5, 'random_...   \n",
+       "2    appendicitis     0  {'n_neighbors': 3, 'proportion': 0.5, 'random_...   \n",
+       "3    appendicitis     0  {'n_neighbors': 3, 'proportion': 0.5, 'random_...   \n",
+       "4    appendicitis     0  {'n_neighbors': 3, 'proportion': 0.5, 'random_...   \n",
+       "..            ...   ...                                                ...   \n",
+       "441  appendicitis     1  {'n_neighbors': 7, 'proportion': 0.5, 'random_...   \n",
+       "442  appendicitis     1  {'n_neighbors': 7, 'proportion': 0.5, 'random_...   \n",
+       "443  appendicitis     1  {'n_neighbors': 7, 'proportion': 0.5, 'random_...   \n",
+       "444  appendicitis     1  {'n_neighbors': 7, 'proportion': 0.5, 'random_...   \n",
+       "445  appendicitis     1  {'n_neighbors': 7, 'proportion': 0.5, 'random_...   \n",
+       "\n",
+       "                 classifier  \\\n",
+       "0    DecisionTreeClassifier   \n",
+       "1    DecisionTreeClassifier   \n",
+       "2    DecisionTreeClassifier   \n",
+       "3    DecisionTreeClassifier   \n",
+       "4    RandomForestClassifier   \n",
+       "..                      ...   \n",
+       "441  DecisionTreeClassifier   \n",
+       "442  DecisionTreeClassifier   \n",
+       "443  DecisionTreeClassifier   \n",
+       "444  RandomForestClassifier   \n",
+       "445  RandomForestClassifier   \n",
+       "\n",
+       "                                                cparam       auc  \n",
+       "0    [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.582353  \n",
+       "1    [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.723529  \n",
+       "2    [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.764706  \n",
+       "3    [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.782353  \n",
+       "4    [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.976471  \n",
+       "..                                                 ...       ...  \n",
+       "441  [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.647059  \n",
+       "442  [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.661765  \n",
+       "443  [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.661765  \n",
+       "444  [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.647059  \n",
+       "445  [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.676471  \n",
+       "\n",
+       "[446 rows x 6 columns]"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tmp = pd.DataFrame.from_dict(results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "smote-variants",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/development/030-analysis.ipynb b/notebooks/development/030-analysis.ipynb
new file mode 100644
index 0000000..7c0b46c
--- /dev/null
+++ b/notebooks/development/030-analysis.ipynb
@@ -0,0 +1,1290 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 316,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from scipy.stats import wilcoxon\n",
+    "import common_datasets.binary_classification as binclas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 317,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = pd.read_csv('haberman.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 318,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>name</th>\n",
+       "      <th>fold</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>haberman</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 2, 'random_state': 5}</td>\n",
+       "      <td>0.604575</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>haberman</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 4, 'random_state': 5}</td>\n",
+       "      <td>0.594771</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>haberman</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 6, 'random_state': 5}</td>\n",
+       "      <td>0.639216</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>haberman</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 8, 'random_state': 5}</td>\n",
+       "      <td>0.666667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>haberman</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>{'max_depth': 2, 'random_state': 5}</td>\n",
+       "      <td>0.601961</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Unnamed: 0      name  fold  \\\n",
+       "0           0  haberman     0   \n",
+       "1           1  haberman     0   \n",
+       "2           2  haberman     0   \n",
+       "3           3  haberman     0   \n",
+       "4           4  haberman     0   \n",
+       "\n",
+       "                                              sparam              classifier  \\\n",
+       "0  {'n_neighbors': 3, 'proportion': 0.5, 'random_...  DecisionTreeClassifier   \n",
+       "1  {'n_neighbors': 3, 'proportion': 0.5, 'random_...  DecisionTreeClassifier   \n",
+       "2  {'n_neighbors': 3, 'proportion': 0.5, 'random_...  DecisionTreeClassifier   \n",
+       "3  {'n_neighbors': 3, 'proportion': 0.5, 'random_...  DecisionTreeClassifier   \n",
+       "4  {'n_neighbors': 3, 'proportion': 0.5, 'random_...  RandomForestClassifier   \n",
+       "\n",
+       "                                cparam       auc  \n",
+       "0  {'max_depth': 2, 'random_state': 5}  0.604575  \n",
+       "1  {'max_depth': 4, 'random_state': 5}  0.594771  \n",
+       "2  {'max_depth': 6, 'random_state': 5}  0.639216  \n",
+       "3  {'max_depth': 8, 'random_state': 5}  0.666667  \n",
+       "4  {'max_depth': 2, 'random_state': 5}  0.601961  "
+      ]
+     },
+     "execution_count": 318,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 319,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data['sparam'] = data['sparam'].apply(eval)\n",
+    "data['cparam'] = data['cparam'].apply(eval)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 320,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def remove_key(dict, key):\n",
+    "    del dict[key]\n",
+    "    return dict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 321,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data['deterministic'] = data['sparam'].apply(lambda x: x['ss_params']['within_simplex_sampling'])\n",
+    "data['sparam'] = data['sparam'].apply(lambda x: remove_key(x, 'ss_params'))\n",
+    "data['cparam'] = data['cparam'].apply(str)\n",
+    "data['sparam'] = data['sparam'].apply(str)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 322,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['Unnamed: 0', 'name', 'fold', 'sparam', 'classifier', 'cparam', 'auc',\n",
+       "       'deterministic'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 322,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 323,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>name</th>\n",
+       "      <th>fold</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc</th>\n",
+       "      <th>deterministic</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>haberman</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 2, 'random_state': 5}</td>\n",
+       "      <td>0.604575</td>\n",
+       "      <td>random</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>haberman</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 4, 'random_state': 5}</td>\n",
+       "      <td>0.594771</td>\n",
+       "      <td>random</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>haberman</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 6, 'random_state': 5}</td>\n",
+       "      <td>0.639216</td>\n",
+       "      <td>random</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>haberman</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 8, 'random_state': 5}</td>\n",
+       "      <td>0.666667</td>\n",
+       "      <td>random</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>haberman</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>{'max_depth': 2, 'random_state': 5}</td>\n",
+       "      <td>0.601961</td>\n",
+       "      <td>random</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Unnamed: 0      name  fold  \\\n",
+       "0           0  haberman     0   \n",
+       "1           1  haberman     0   \n",
+       "2           2  haberman     0   \n",
+       "3           3  haberman     0   \n",
+       "4           4  haberman     0   \n",
+       "\n",
+       "                                              sparam              classifier  \\\n",
+       "0  {'n_neighbors': 3, 'proportion': 0.5, 'random_...  DecisionTreeClassifier   \n",
+       "1  {'n_neighbors': 3, 'proportion': 0.5, 'random_...  DecisionTreeClassifier   \n",
+       "2  {'n_neighbors': 3, 'proportion': 0.5, 'random_...  DecisionTreeClassifier   \n",
+       "3  {'n_neighbors': 3, 'proportion': 0.5, 'random_...  DecisionTreeClassifier   \n",
+       "4  {'n_neighbors': 3, 'proportion': 0.5, 'random_...  RandomForestClassifier   \n",
+       "\n",
+       "                                cparam       auc deterministic  \n",
+       "0  {'max_depth': 2, 'random_state': 5}  0.604575        random  \n",
+       "1  {'max_depth': 4, 'random_state': 5}  0.594771        random  \n",
+       "2  {'max_depth': 6, 'random_state': 5}  0.639216        random  \n",
+       "3  {'max_depth': 8, 'random_state': 5}  0.666667        random  \n",
+       "4  {'max_depth': 2, 'random_state': 5}  0.601961        random  "
+      ]
+     },
+     "execution_count": 323,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 324,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grouped = data.groupby(['name', 'sparam', 'classifier', 'cparam', 'deterministic']).apply(lambda pdf: pdf.sort_values('fold')['auc'].values.tolist())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 325,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grouped = grouped.reset_index(drop=False)\n",
+    "grouped = grouped.rename(columns={0: 'auc'})\n",
+    "determ = grouped[grouped['deterministic'] == 'deterministic'].drop(columns=['deterministic'])\n",
+    "rand = grouped[grouped['deterministic'] == 'random'].drop(columns=['deterministic'])\n",
+    "merged = pd.merge(determ.rename(columns={'auc': 'auc_det'}), rand, on=['name', 'sparam', 'classifier', 'cparam'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 326,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "merged['auc_mean_det'] = merged['auc_det'].apply(np.mean)\n",
+    "merged['auc_std_det'] = merged['auc_det'].apply(np.std)\n",
+    "merged['auc_min_det'] = merged['auc_det'].apply(np.min)\n",
+    "merged['auc_max_det'] = merged['auc_det'].apply(np.max)\n",
+    "merged['auc_mean'] = merged['auc'].apply(np.mean)\n",
+    "merged['auc_std'] = merged['auc'].apply(np.std)\n",
+    "merged['auc_min'] = merged['auc'].apply(np.min)\n",
+    "merged['auc_max'] = merged['auc'].apply(np.max)\n",
+    "merged['p_l'] = merged.apply(lambda row: wilcoxon(row['auc_det'], row['auc'], zero_method='zsplit', alternative='less').pvalue, axis=1)\n",
+    "merged['p_g'] = merged.apply(lambda row: wilcoxon(row['auc_det'], row['auc'], zero_method='zsplit', alternative='greater').pvalue, axis=1)\n",
+    "merged['f_l'] = merged['p_l'] < 0.05\n",
+    "merged['f_g'] = merged['p_g'] < 0.05"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 327,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def model_selection(pdf):\n",
+    "    max_det = pdf[pdf['auc_mean_det'] == pdf['auc_mean_det'].max()].iloc[0]\n",
+    "    max_ran = pdf[pdf['auc_mean'] == pdf['auc_mean'].max()].iloc[0]\n",
+    "    return pd.Series({'auc_mean_det': max_det['auc_mean_det'],\n",
+    "            'auc_mean': max_ran['auc_mean'],\n",
+    "            'auc_std_det': max_det['auc_std_det'],\n",
+    "            'auc_std': max_ran['auc_std'],\n",
+    "            'p_l': wilcoxon(max_det['auc_det'], max_ran['auc'], zero_method='zsplit', alternative='less').pvalue,\n",
+    "            'p_g': wilcoxon(max_det['auc_det'], max_ran['auc'], zero_method='zsplit', alternative='greater').pvalue,\n",
+    "            'auc_median_det': np.median(max_det['auc_det']),\n",
+    "            'auc_median': np.median(max_ran['auc'])})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 328,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>auc_mean_det</th>\n",
+       "      <th>auc_mean</th>\n",
+       "      <th>auc_std_det</th>\n",
+       "      <th>auc_std</th>\n",
+       "      <th>p_l</th>\n",
+       "      <th>p_g</th>\n",
+       "      <th>auc_median_det</th>\n",
+       "      <th>auc_median</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>classifier</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>DecisionTreeClassifier</th>\n",
+       "      <td>0.665536</td>\n",
+       "      <td>0.665514</td>\n",
+       "      <td>0.065766</td>\n",
+       "      <td>0.060871</td>\n",
+       "      <td>0.627707</td>\n",
+       "      <td>0.372293</td>\n",
+       "      <td>0.666319</td>\n",
+       "      <td>0.670139</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>KNeighborsClassifier</th>\n",
+       "      <td>0.643067</td>\n",
+       "      <td>0.643093</td>\n",
+       "      <td>0.071446</td>\n",
+       "      <td>0.069934</td>\n",
+       "      <td>0.446855</td>\n",
+       "      <td>0.553145</td>\n",
+       "      <td>0.646446</td>\n",
+       "      <td>0.649163</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>RandomForestClassifier</th>\n",
+       "      <td>0.710325</td>\n",
+       "      <td>0.708644</td>\n",
+       "      <td>0.060550</td>\n",
+       "      <td>0.062652</td>\n",
+       "      <td>0.909747</td>\n",
+       "      <td>0.090253</td>\n",
+       "      <td>0.711458</td>\n",
+       "      <td>0.711111</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SVC</th>\n",
+       "      <td>0.719221</td>\n",
+       "      <td>0.718938</td>\n",
+       "      <td>0.070995</td>\n",
+       "      <td>0.070885</td>\n",
+       "      <td>0.810147</td>\n",
+       "      <td>0.189853</td>\n",
+       "      <td>0.723897</td>\n",
+       "      <td>0.722917</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                        auc_mean_det  auc_mean  auc_std_det   auc_std  \\\n",
+       "classifier                                                              \n",
+       "DecisionTreeClassifier      0.665536  0.665514     0.065766  0.060871   \n",
+       "KNeighborsClassifier        0.643067  0.643093     0.071446  0.069934   \n",
+       "RandomForestClassifier      0.710325  0.708644     0.060550  0.062652   \n",
+       "SVC                         0.719221  0.718938     0.070995  0.070885   \n",
+       "\n",
+       "                             p_l       p_g  auc_median_det  auc_median  \n",
+       "classifier                                                              \n",
+       "DecisionTreeClassifier  0.627707  0.372293        0.666319    0.670139  \n",
+       "KNeighborsClassifier    0.446855  0.553145        0.646446    0.649163  \n",
+       "RandomForestClassifier  0.909747  0.090253        0.711458    0.711111  \n",
+       "SVC                     0.810147  0.189853        0.723897    0.722917  "
+      ]
+     },
+     "execution_count": 328,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged.groupby('classifier').apply(model_selection)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 329,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc_det</th>\n",
+       "      <th>auc</th>\n",
+       "      <th>auc_mean_det</th>\n",
+       "      <th>auc_std_det</th>\n",
+       "      <th>auc_min_det</th>\n",
+       "      <th>auc_max_det</th>\n",
+       "      <th>auc_mean</th>\n",
+       "      <th>auc_std</th>\n",
+       "      <th>auc_min</th>\n",
+       "      <th>auc_max</th>\n",
+       "      <th>p_l</th>\n",
+       "      <th>p_g</th>\n",
+       "      <th>f_l</th>\n",
+       "      <th>f_g</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>classifier</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>DecisionTreeClassifier</th>\n",
+       "      <th>155</th>\n",
+       "      <td>haberman</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 4, 'random_state': 5}</td>\n",
+       "      <td>[0.5908496732026143, 0.7548611111111111, 0.653...</td>\n",
+       "      <td>[0.630718954248366, 0.8020833333333333, 0.7270...</td>\n",
+       "      <td>0.665536</td>\n",
+       "      <td>0.065766</td>\n",
+       "      <td>0.474306</td>\n",
+       "      <td>0.807639</td>\n",
+       "      <td>0.660699</td>\n",
+       "      <td>0.073328</td>\n",
+       "      <td>0.434722</td>\n",
+       "      <td>0.826389</td>\n",
+       "      <td>0.915268</td>\n",
+       "      <td>0.084732</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>KNeighborsClassifier</th>\n",
+       "      <th>116</th>\n",
+       "      <td>haberman</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.5, 'random_...</td>\n",
+       "      <td>KNeighborsClassifier</td>\n",
+       "      <td>{'n_neighbors': 5}</td>\n",
+       "      <td>[0.550326797385621, 0.6277777777777778, 0.5958...</td>\n",
+       "      <td>[0.565359477124183, 0.6069444444444444, 0.5687...</td>\n",
+       "      <td>0.643067</td>\n",
+       "      <td>0.071446</td>\n",
+       "      <td>0.419444</td>\n",
+       "      <td>0.796528</td>\n",
+       "      <td>0.643093</td>\n",
+       "      <td>0.069934</td>\n",
+       "      <td>0.390278</td>\n",
+       "      <td>0.797917</td>\n",
+       "      <td>0.446855</td>\n",
+       "      <td>0.553145</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>RandomForestClassifier</th>\n",
+       "      <th>120</th>\n",
+       "      <td>haberman</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.5, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>{'max_depth': 4, 'random_state': 5}</td>\n",
+       "      <td>[0.5947712418300654, 0.7388888888888889, 0.75,...</td>\n",
+       "      <td>[0.5934640522875817, 0.7486111111111111, 0.730...</td>\n",
+       "      <td>0.710325</td>\n",
+       "      <td>0.060550</td>\n",
+       "      <td>0.491503</td>\n",
+       "      <td>0.873611</td>\n",
+       "      <td>0.707879</td>\n",
+       "      <td>0.059607</td>\n",
+       "      <td>0.516340</td>\n",
+       "      <td>0.866667</td>\n",
+       "      <td>0.984935</td>\n",
+       "      <td>0.015065</td>\n",
+       "      <td>False</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SVC</th>\n",
+       "      <th>150</th>\n",
+       "      <td>haberman</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>SVC</td>\n",
+       "      <td>{'C': 0.01, 'probability': True, 'random_state...</td>\n",
+       "      <td>[0.6261437908496732, 0.751388888888889, 0.6930...</td>\n",
+       "      <td>[0.615686274509804, 0.75, 0.6958333333333333, ...</td>\n",
+       "      <td>0.719221</td>\n",
+       "      <td>0.070995</td>\n",
+       "      <td>0.462745</td>\n",
+       "      <td>0.851634</td>\n",
+       "      <td>0.718700</td>\n",
+       "      <td>0.070960</td>\n",
+       "      <td>0.458824</td>\n",
+       "      <td>0.849020</td>\n",
+       "      <td>0.928957</td>\n",
+       "      <td>0.071043</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                name  \\\n",
+       "classifier                             \n",
+       "DecisionTreeClassifier 155  haberman   \n",
+       "KNeighborsClassifier   116  haberman   \n",
+       "RandomForestClassifier 120  haberman   \n",
+       "SVC                    150  haberman   \n",
+       "\n",
+       "                                                                       sparam  \\\n",
+       "classifier                                                                      \n",
+       "DecisionTreeClassifier 155  {'n_neighbors': 7, 'proportion': 1.0, 'random_...   \n",
+       "KNeighborsClassifier   116  {'n_neighbors': 5, 'proportion': 1.5, 'random_...   \n",
+       "RandomForestClassifier 120  {'n_neighbors': 5, 'proportion': 1.5, 'random_...   \n",
+       "SVC                    150  {'n_neighbors': 7, 'proportion': 0.5, 'random_...   \n",
+       "\n",
+       "                                        classifier  \\\n",
+       "classifier                                           \n",
+       "DecisionTreeClassifier 155  DecisionTreeClassifier   \n",
+       "KNeighborsClassifier   116    KNeighborsClassifier   \n",
+       "RandomForestClassifier 120  RandomForestClassifier   \n",
+       "SVC                    150                     SVC   \n",
+       "\n",
+       "                                                                       cparam  \\\n",
+       "classifier                                                                      \n",
+       "DecisionTreeClassifier 155                {'max_depth': 4, 'random_state': 5}   \n",
+       "KNeighborsClassifier   116                                 {'n_neighbors': 5}   \n",
+       "RandomForestClassifier 120                {'max_depth': 4, 'random_state': 5}   \n",
+       "SVC                    150  {'C': 0.01, 'probability': True, 'random_state...   \n",
+       "\n",
+       "                                                                      auc_det  \\\n",
+       "classifier                                                                      \n",
+       "DecisionTreeClassifier 155  [0.5908496732026143, 0.7548611111111111, 0.653...   \n",
+       "KNeighborsClassifier   116  [0.550326797385621, 0.6277777777777778, 0.5958...   \n",
+       "RandomForestClassifier 120  [0.5947712418300654, 0.7388888888888889, 0.75,...   \n",
+       "SVC                    150  [0.6261437908496732, 0.751388888888889, 0.6930...   \n",
+       "\n",
+       "                                                                          auc  \\\n",
+       "classifier                                                                      \n",
+       "DecisionTreeClassifier 155  [0.630718954248366, 0.8020833333333333, 0.7270...   \n",
+       "KNeighborsClassifier   116  [0.565359477124183, 0.6069444444444444, 0.5687...   \n",
+       "RandomForestClassifier 120  [0.5934640522875817, 0.7486111111111111, 0.730...   \n",
+       "SVC                    150  [0.615686274509804, 0.75, 0.6958333333333333, ...   \n",
+       "\n",
+       "                            auc_mean_det  auc_std_det  auc_min_det  \\\n",
+       "classifier                                                           \n",
+       "DecisionTreeClassifier 155      0.665536     0.065766     0.474306   \n",
+       "KNeighborsClassifier   116      0.643067     0.071446     0.419444   \n",
+       "RandomForestClassifier 120      0.710325     0.060550     0.491503   \n",
+       "SVC                    150      0.719221     0.070995     0.462745   \n",
+       "\n",
+       "                            auc_max_det  auc_mean   auc_std   auc_min  \\\n",
+       "classifier                                                              \n",
+       "DecisionTreeClassifier 155     0.807639  0.660699  0.073328  0.434722   \n",
+       "KNeighborsClassifier   116     0.796528  0.643093  0.069934  0.390278   \n",
+       "RandomForestClassifier 120     0.873611  0.707879  0.059607  0.516340   \n",
+       "SVC                    150     0.851634  0.718700  0.070960  0.458824   \n",
+       "\n",
+       "                             auc_max       p_l       p_g    f_l    f_g  \n",
+       "classifier                                                              \n",
+       "DecisionTreeClassifier 155  0.826389  0.915268  0.084732  False  False  \n",
+       "KNeighborsClassifier   116  0.797917  0.446855  0.553145  False  False  \n",
+       "RandomForestClassifier 120  0.866667  0.984935  0.015065  False   True  \n",
+       "SVC                    150  0.849020  0.928957  0.071043  False  False  "
+      ]
+     },
+     "execution_count": 329,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged.groupby('classifier').apply(lambda pdf: pdf[pdf['auc_mean_det'] == pdf['auc_mean_det'].max()])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 330,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc_det</th>\n",
+       "      <th>auc</th>\n",
+       "      <th>auc_mean_det</th>\n",
+       "      <th>auc_std_det</th>\n",
+       "      <th>auc_min_det</th>\n",
+       "      <th>auc_max_det</th>\n",
+       "      <th>auc_mean</th>\n",
+       "      <th>auc_std</th>\n",
+       "      <th>auc_min</th>\n",
+       "      <th>auc_max</th>\n",
+       "      <th>p_l</th>\n",
+       "      <th>p_g</th>\n",
+       "      <th>f_l</th>\n",
+       "      <th>f_g</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>150</th>\n",
+       "      <td>haberman</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>SVC</td>\n",
+       "      <td>{'C': 0.01, 'probability': True, 'random_state...</td>\n",
+       "      <td>[0.6261437908496732, 0.751388888888889, 0.6930...</td>\n",
+       "      <td>[0.615686274509804, 0.75, 0.6958333333333333, ...</td>\n",
+       "      <td>0.719221</td>\n",
+       "      <td>0.070995</td>\n",
+       "      <td>0.462745</td>\n",
+       "      <td>0.851634</td>\n",
+       "      <td>0.7187</td>\n",
+       "      <td>0.07096</td>\n",
+       "      <td>0.458824</td>\n",
+       "      <td>0.84902</td>\n",
+       "      <td>0.928957</td>\n",
+       "      <td>0.071043</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         name                                             sparam classifier  \\\n",
+       "150  haberman  {'n_neighbors': 7, 'proportion': 0.5, 'random_...        SVC   \n",
+       "\n",
+       "                                                cparam  \\\n",
+       "150  {'C': 0.01, 'probability': True, 'random_state...   \n",
+       "\n",
+       "                                               auc_det  \\\n",
+       "150  [0.6261437908496732, 0.751388888888889, 0.6930...   \n",
+       "\n",
+       "                                                   auc  auc_mean_det  \\\n",
+       "150  [0.615686274509804, 0.75, 0.6958333333333333, ...      0.719221   \n",
+       "\n",
+       "     auc_std_det  auc_min_det  auc_max_det  auc_mean  auc_std   auc_min  \\\n",
+       "150     0.070995     0.462745     0.851634    0.7187  0.07096  0.458824   \n",
+       "\n",
+       "     auc_max       p_l       p_g    f_l    f_g  \n",
+       "150  0.84902  0.928957  0.071043  False  False  "
+      ]
+     },
+     "execution_count": 330,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged[merged['auc_mean_det'] == merged['auc_mean_det'].max()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 331,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc_det</th>\n",
+       "      <th>auc</th>\n",
+       "      <th>auc_mean_det</th>\n",
+       "      <th>auc_std_det</th>\n",
+       "      <th>auc_min_det</th>\n",
+       "      <th>auc_max_det</th>\n",
+       "      <th>auc_mean</th>\n",
+       "      <th>auc_std</th>\n",
+       "      <th>auc_min</th>\n",
+       "      <th>auc_max</th>\n",
+       "      <th>p_l</th>\n",
+       "      <th>p_g</th>\n",
+       "      <th>f_l</th>\n",
+       "      <th>f_g</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>classifier</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>DecisionTreeClassifier</th>\n",
+       "      <th>176</th>\n",
+       "      <td>haberman</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 1.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 2, 'random_state': 5}</td>\n",
+       "      <td>[0.5215686274509804, 0.8076388888888888, 0.655...</td>\n",
+       "      <td>[0.5875816993464053, 0.8083333333333332, 0.672...</td>\n",
+       "      <td>0.665476</td>\n",
+       "      <td>0.063516</td>\n",
+       "      <td>0.490850</td>\n",
+       "      <td>0.850000</td>\n",
+       "      <td>0.665514</td>\n",
+       "      <td>0.060871</td>\n",
+       "      <td>0.490850</td>\n",
+       "      <td>0.822222</td>\n",
+       "      <td>0.583773</td>\n",
+       "      <td>0.416227</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>KNeighborsClassifier</th>\n",
+       "      <th>116</th>\n",
+       "      <td>haberman</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.5, 'random_...</td>\n",
+       "      <td>KNeighborsClassifier</td>\n",
+       "      <td>{'n_neighbors': 5}</td>\n",
+       "      <td>[0.550326797385621, 0.6277777777777778, 0.5958...</td>\n",
+       "      <td>[0.565359477124183, 0.6069444444444444, 0.5687...</td>\n",
+       "      <td>0.643067</td>\n",
+       "      <td>0.071446</td>\n",
+       "      <td>0.419444</td>\n",
+       "      <td>0.796528</td>\n",
+       "      <td>0.643093</td>\n",
+       "      <td>0.069934</td>\n",
+       "      <td>0.390278</td>\n",
+       "      <td>0.797917</td>\n",
+       "      <td>0.446855</td>\n",
+       "      <td>0.553145</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>RandomForestClassifier</th>\n",
+       "      <th>164</th>\n",
+       "      <td>haberman</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>{'max_depth': 4, 'random_state': 5}</td>\n",
+       "      <td>[0.6052287581699347, 0.75, 0.7305555555555555,...</td>\n",
+       "      <td>[0.5830065359477125, 0.7361111111111112, 0.773...</td>\n",
+       "      <td>0.708954</td>\n",
+       "      <td>0.061871</td>\n",
+       "      <td>0.500654</td>\n",
+       "      <td>0.879167</td>\n",
+       "      <td>0.708644</td>\n",
+       "      <td>0.062652</td>\n",
+       "      <td>0.516340</td>\n",
+       "      <td>0.879167</td>\n",
+       "      <td>0.695420</td>\n",
+       "      <td>0.304580</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SVC</th>\n",
+       "      <th>153</th>\n",
+       "      <td>haberman</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>SVC</td>\n",
+       "      <td>{'C': 0.1, 'probability': True, 'random_state'...</td>\n",
+       "      <td>[0.6235294117647059, 0.7486111111111112, 0.691...</td>\n",
+       "      <td>[0.6143790849673203, 0.75, 0.701388888888889, ...</td>\n",
+       "      <td>0.719117</td>\n",
+       "      <td>0.071229</td>\n",
+       "      <td>0.465359</td>\n",
+       "      <td>0.854248</td>\n",
+       "      <td>0.718938</td>\n",
+       "      <td>0.070885</td>\n",
+       "      <td>0.458824</td>\n",
+       "      <td>0.849020</td>\n",
+       "      <td>0.665400</td>\n",
+       "      <td>0.334600</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                name  \\\n",
+       "classifier                             \n",
+       "DecisionTreeClassifier 176  haberman   \n",
+       "KNeighborsClassifier   116  haberman   \n",
+       "RandomForestClassifier 164  haberman   \n",
+       "SVC                    153  haberman   \n",
+       "\n",
+       "                                                                       sparam  \\\n",
+       "classifier                                                                      \n",
+       "DecisionTreeClassifier 176  {'n_neighbors': 7, 'proportion': 1.5, 'random_...   \n",
+       "KNeighborsClassifier   116  {'n_neighbors': 5, 'proportion': 1.5, 'random_...   \n",
+       "RandomForestClassifier 164  {'n_neighbors': 7, 'proportion': 1.0, 'random_...   \n",
+       "SVC                    153  {'n_neighbors': 7, 'proportion': 0.5, 'random_...   \n",
+       "\n",
+       "                                        classifier  \\\n",
+       "classifier                                           \n",
+       "DecisionTreeClassifier 176  DecisionTreeClassifier   \n",
+       "KNeighborsClassifier   116    KNeighborsClassifier   \n",
+       "RandomForestClassifier 164  RandomForestClassifier   \n",
+       "SVC                    153                     SVC   \n",
+       "\n",
+       "                                                                       cparam  \\\n",
+       "classifier                                                                      \n",
+       "DecisionTreeClassifier 176                {'max_depth': 2, 'random_state': 5}   \n",
+       "KNeighborsClassifier   116                                 {'n_neighbors': 5}   \n",
+       "RandomForestClassifier 164                {'max_depth': 4, 'random_state': 5}   \n",
+       "SVC                    153  {'C': 0.1, 'probability': True, 'random_state'...   \n",
+       "\n",
+       "                                                                      auc_det  \\\n",
+       "classifier                                                                      \n",
+       "DecisionTreeClassifier 176  [0.5215686274509804, 0.8076388888888888, 0.655...   \n",
+       "KNeighborsClassifier   116  [0.550326797385621, 0.6277777777777778, 0.5958...   \n",
+       "RandomForestClassifier 164  [0.6052287581699347, 0.75, 0.7305555555555555,...   \n",
+       "SVC                    153  [0.6235294117647059, 0.7486111111111112, 0.691...   \n",
+       "\n",
+       "                                                                          auc  \\\n",
+       "classifier                                                                      \n",
+       "DecisionTreeClassifier 176  [0.5875816993464053, 0.8083333333333332, 0.672...   \n",
+       "KNeighborsClassifier   116  [0.565359477124183, 0.6069444444444444, 0.5687...   \n",
+       "RandomForestClassifier 164  [0.5830065359477125, 0.7361111111111112, 0.773...   \n",
+       "SVC                    153  [0.6143790849673203, 0.75, 0.701388888888889, ...   \n",
+       "\n",
+       "                            auc_mean_det  auc_std_det  auc_min_det  \\\n",
+       "classifier                                                           \n",
+       "DecisionTreeClassifier 176      0.665476     0.063516     0.490850   \n",
+       "KNeighborsClassifier   116      0.643067     0.071446     0.419444   \n",
+       "RandomForestClassifier 164      0.708954     0.061871     0.500654   \n",
+       "SVC                    153      0.719117     0.071229     0.465359   \n",
+       "\n",
+       "                            auc_max_det  auc_mean   auc_std   auc_min  \\\n",
+       "classifier                                                              \n",
+       "DecisionTreeClassifier 176     0.850000  0.665514  0.060871  0.490850   \n",
+       "KNeighborsClassifier   116     0.796528  0.643093  0.069934  0.390278   \n",
+       "RandomForestClassifier 164     0.879167  0.708644  0.062652  0.516340   \n",
+       "SVC                    153     0.854248  0.718938  0.070885  0.458824   \n",
+       "\n",
+       "                             auc_max       p_l       p_g    f_l    f_g  \n",
+       "classifier                                                              \n",
+       "DecisionTreeClassifier 176  0.822222  0.583773  0.416227  False  False  \n",
+       "KNeighborsClassifier   116  0.797917  0.446855  0.553145  False  False  \n",
+       "RandomForestClassifier 164  0.879167  0.695420  0.304580  False  False  \n",
+       "SVC                    153  0.849020  0.665400  0.334600  False  False  "
+      ]
+     },
+     "execution_count": 331,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged.groupby('classifier').apply(lambda pdf: pdf[pdf['auc_mean'] == pdf['auc_mean'].max()])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 332,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc_det</th>\n",
+       "      <th>auc</th>\n",
+       "      <th>auc_mean_det</th>\n",
+       "      <th>auc_std_det</th>\n",
+       "      <th>auc_min_det</th>\n",
+       "      <th>auc_max_det</th>\n",
+       "      <th>auc_mean</th>\n",
+       "      <th>auc_std</th>\n",
+       "      <th>auc_min</th>\n",
+       "      <th>auc_max</th>\n",
+       "      <th>p_l</th>\n",
+       "      <th>p_g</th>\n",
+       "      <th>f_l</th>\n",
+       "      <th>f_g</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>153</th>\n",
+       "      <td>haberman</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>SVC</td>\n",
+       "      <td>{'C': 0.1, 'probability': True, 'random_state'...</td>\n",
+       "      <td>[0.6235294117647059, 0.7486111111111112, 0.691...</td>\n",
+       "      <td>[0.6143790849673203, 0.75, 0.701388888888889, ...</td>\n",
+       "      <td>0.719117</td>\n",
+       "      <td>0.071229</td>\n",
+       "      <td>0.465359</td>\n",
+       "      <td>0.854248</td>\n",
+       "      <td>0.718938</td>\n",
+       "      <td>0.070885</td>\n",
+       "      <td>0.458824</td>\n",
+       "      <td>0.84902</td>\n",
+       "      <td>0.6654</td>\n",
+       "      <td>0.3346</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         name                                             sparam classifier  \\\n",
+       "153  haberman  {'n_neighbors': 7, 'proportion': 0.5, 'random_...        SVC   \n",
+       "\n",
+       "                                                cparam  \\\n",
+       "153  {'C': 0.1, 'probability': True, 'random_state'...   \n",
+       "\n",
+       "                                               auc_det  \\\n",
+       "153  [0.6235294117647059, 0.7486111111111112, 0.691...   \n",
+       "\n",
+       "                                                   auc  auc_mean_det  \\\n",
+       "153  [0.6143790849673203, 0.75, 0.701388888888889, ...      0.719117   \n",
+       "\n",
+       "     auc_std_det  auc_min_det  auc_max_det  auc_mean   auc_std   auc_min  \\\n",
+       "153     0.071229     0.465359     0.854248  0.718938  0.070885  0.458824   \n",
+       "\n",
+       "     auc_max     p_l     p_g    f_l    f_g  \n",
+       "153  0.84902  0.6654  0.3346  False  False  "
+      ]
+     },
+     "execution_count": 332,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged[merged['auc_mean'] == merged['auc_mean'].max()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 333,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "auc_mean        0.608057\n",
+       "auc_std         0.077244\n",
+       "auc_mean_det    0.607343\n",
+       "auc_std_det     0.077029\n",
+       "p_l             0.431314\n",
+       "p_g             0.568686\n",
+       "f_l             0.141414\n",
+       "f_g             0.055556\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 333,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged[['auc_mean', 'auc_std', 'auc_mean_det', 'auc_std_det', 'p_l', 'p_g', 'f_l', 'f_g']].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 334,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "auc_min        0.381989\n",
+       "auc_max        0.802708\n",
+       "auc_min_det    0.380450\n",
+       "auc_max_det    0.799888\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 334,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged[['auc_min', 'auc_max', 'auc_min_det', 'auc_max_det']].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 335,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "auc_mean        0.632868\n",
+       "auc_std         0.070529\n",
+       "auc_mean_det    0.631631\n",
+       "auc_std_det     0.070056\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 335,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged[['auc_mean', 'auc_std', 'auc_mean_det', 'auc_std_det']].median()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "smote-variants",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/development/031-deterministic-test-joblib-ml.ipynb b/notebooks/development/031-deterministic-test-joblib-ml.ipynb
new file mode 100644
index 0000000..a2fdfbd
--- /dev/null
+++ b/notebooks/development/031-deterministic-test-joblib-ml.ipynb
@@ -0,0 +1,476 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import datetime\n",
+    "\n",
+    "from joblib import Parallel, delayed\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.svm import SVC\n",
+    "from sklearn.neighbors import KNeighborsClassifier\n",
+    "from sklearn.model_selection import RepeatedStratifiedKFold\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.metrics import roc_auc_score\n",
+    "\n",
+    "from smote_variants.oversampling import SMOTE\n",
+    "from common_datasets.binary_classification import get_filtered_data_loaders"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging\n",
+    "logger = logging.getLogger('smote_variants')\n",
+    "logger.setLevel(logging.ERROR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "classifiers = {\n",
+    "DecisionTreeClassifier: [{'max_depth': md, 'random_state': 5} for md in range(4, 10, 2)],\n",
+    "RandomForestClassifier: [{'max_depth': md, 'random_state': 5} for md in range(4, 10, 2)],\n",
+    "KNeighborsClassifier: [{'n_neighbors': nn} for nn in range(1, 10, 2)],\n",
+    "SVC: [{'C': c, 'probability': True, 'random_state': 5} for c in [0.001, 0.01, 0.1]]\\\n",
+    "            + [{'C': c, 'probability': True, 'kernel': 'poly', 'degree': 2, 'random_state': 5} for c in [0.001, 0.01, 0.1]]\\\n",
+    "            + [{'C': c, 'probability': True, 'kernel': 'poly', 'degree': 3, 'random_state': 5} for c in [0.001, 0.01, 0.1]]\\\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "datasets = get_filtered_data_loaders(n_col_bounds=(2, 40),\n",
+    "                                        n_bounds=(10, 500),\n",
+    "                                        n_minority_bounds=(10, 500),\n",
+    "                                        n_from_phenotypes=1,\n",
+    "                                        n_smallest=20)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "smote_params = [\n",
+    "    #{'n_neighbors': 3, 'proportion': 0.5, 'random_state': 5, 'nn_params': {'metric': 'precomputed', 'metric_learning_method': 'id'}},\n",
+    "    #{'n_neighbors': 5, 'proportion': 0.5, 'random_state': 5, 'nn_params': {'metric': 'precomputed', 'metric_learning_method': 'id'}},\n",
+    "    #{'n_neighbors': 7, 'proportion': 0.5, 'random_state': 5, 'nn_params': {'metric': 'precomputed', 'metric_learning_method': 'id'}},\n",
+    "    #{'n_neighbors': 3, 'proportion': 1.0, 'random_state': 5, 'nn_params': {'metric': 'precomputed', 'metric_learning_method': 'id'}},\n",
+    "    {'n_neighbors': 5, 'proportion': 1.0, 'random_state': 5, 'nn_params': {'metric': 'precomputed', 'metric_learning_method': 'id'}},\n",
+    "    #{'n_neighbors': 7, 'proportion': 1.0, 'random_state': 5, 'nn_params': {'metric': 'precomputed', 'metric_learning_method': 'id'}},\n",
+    "    #{'n_neighbors': 3, 'proportion': 1.5, 'random_state': 5, 'nn_params': {'metric': 'precomputed', 'metric_learning_method': 'id'}},\n",
+    "    #{'n_neighbors': 5, 'proportion': 1.5, 'random_state': 5, 'nn_params': {'metric': 'precomputed', 'metric_learning_method': 'id'}},\n",
+    "    #{'n_neighbors': 7, 'proportion': 1.5, 'random_state': 5, 'nn_params': {'metric': 'precomputed', 'metric_learning_method': 'id'}},\n",
+    "    #{'n_neighbors': 3, 'proportion': 0.5, 'random_state': 5, 'nn_params': {'metric': 'precomputed', 'metric_learning_method': 'n_unique_inv'}},\n",
+    "    #{'n_neighbors': 5, 'proportion': 0.5, 'random_state': 5, 'nn_params': {'metric': 'precomputed', 'metric_learning_method': 'n_unique_inv'}},\n",
+    "    #{'n_neighbors': 7, 'proportion': 0.5, 'random_state': 5, 'nn_params': {'metric': 'precomputed', 'metric_learning_method': 'n_unique_inv'}},\n",
+    "    #{'n_neighbors': 3, 'proportion': 1.0, 'random_state': 5, 'nn_params': {'metric': 'precomputed', 'metric_learning_method': 'n_unique_inv'}},\n",
+    "    {'n_neighbors': 5, 'proportion': 1.0, 'random_state': 5, 'nn_params': {'metric': 'precomputed', 'metric_learning_method': 'MI_weighted'}},\n",
+    "    #{'n_neighbors': 7, 'proportion': 1.0, 'random_state': 5, 'nn_params': {'metric': 'precomputed', 'metric_learning_method': 'n_unique_inv'}},\n",
+    "    #{'n_neighbors': 3, 'proportion': 1.5, 'random_state': 5, 'nn_params': {'metric': 'precomputed', 'metric_learning_method': 'n_unique_inv'}},\n",
+    "    #{'n_neighbors': 5, 'proportion': 1.5, 'random_state': 5, 'nn_params': {'metric': 'precomputed', 'metric_learning_method': 'n_unique_inv'}},\n",
+    "    #{'n_neighbors': 7, 'proportion': 1.5, 'random_state': 5, 'nn_params': {'metric': 'precomputed', 'metric_learning_method': 'n_unique_inv'}},\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for data_loader in datasets:\n",
+    "        results = []"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def job_generator(data_loader):\n",
+    "\n",
+    "    dataset = data_loader()\n",
+    "    print(datetime.datetime.now(), dataset['name'])\n",
+    "\n",
+    "    X = dataset['data']\n",
+    "    y = dataset['target']\n",
+    "\n",
+    "    validator = RepeatedStratifiedKFold(n_splits=5, n_repeats=200, random_state=5)\n",
+    "\n",
+    "    for fidx, (train, test) in enumerate(validator.split(X, y, y)):\n",
+    "        X_train = X[train]\n",
+    "        X_test = X[test]\n",
+    "        y_train = y[train]\n",
+    "        y_test = y[test]\n",
+    "\n",
+    "        ss = StandardScaler()\n",
+    "        ss.fit(X_train)\n",
+    "        X_train = ss.transform(X_train)\n",
+    "        X_test = ss.transform(X_test)\n",
+    "\n",
+    "        for sparam in smote_params:\n",
+    "            oversampling = SMOTE(**sparam)\n",
+    "            X_samp, y_samp = oversampling.sample(X_train, y_train)\n",
+    "\n",
+    "            for classifier, cparams in classifiers.items():\n",
+    "                for param in cparams:\n",
+    "                    job = {\n",
+    "                        'X_samp': X_samp,\n",
+    "                        'y_samp': y_samp,\n",
+    "                        'X_test': X_test,\n",
+    "                        'y_test': y_test,\n",
+    "                        'classifier': classifier,\n",
+    "                        'param': param\n",
+    "                    }\n",
+    "                    description = {\n",
+    "                        'name': dataset['name'],\n",
+    "                        'fold': fidx,\n",
+    "                        'sparam': sparam,\n",
+    "                        'classifier': classifier.__name__,\n",
+    "                        'cparam': param\n",
+    "                    }\n",
+    "                    yield job, description"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def do_job(job, description):\n",
+    "    classifier_obj = job['classifier'](**job['param'])\n",
+    "    classifier_obj.fit(job['X_samp'], job['y_samp'])\n",
+    "    y_pred = classifier_obj.predict_proba(job['X_test'])\n",
+    "    auc = roc_auc_score(job['y_test'], y_pred[:, 1])\n",
+    "    return description | {'auc': auc}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-12-16 11:14:50.048117 appendicitis\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-12-16 11:21:45.349117 bupa\n",
+      "2023-12-16 11:31:09.550091 cleveland-0_vs_4\n",
+      "2023-12-16 11:42:26.786704 CM1\n"
+     ]
+    }
+   ],
+   "source": [
+    "for data_loader in datasets:\n",
+    "    dataset = data_loader()\n",
+    "\n",
+    "    if dataset['name'] in ['iris0', 'dermatology-6']:\n",
+    "        continue\n",
+    "\n",
+    "    results = Parallel(n_jobs=3)(delayed(do_job)(*x) for x in job_generator(data_loader))\n",
+    "    results = pd.DataFrame.from_dict(results)\n",
+    "    results.to_csv(f\"{dataset['name']}-ml.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results.to_csv(f\"{dataset['name']}.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-12-14 21:42:48.609799 appendicitis\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'name': 'appendicitis',\n",
+       " 'fold': 0,\n",
+       " 'sparam': {'n_neighbors': 3,\n",
+       "  'proportion': 0.5,\n",
+       "  'random_state': 5,\n",
+       "  'ss_params': {'within_simplex_sampling': 'random'}},\n",
+       " 'classifier': 'DecisionTreeClassifier',\n",
+       " 'cparam': {'max_depth': 2, 'random_state': 5},\n",
+       " 'auc': 0.5823529411764707}"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "do_job(*next(job_generator(datasets[0])))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>fold</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.582353</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.723529</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.764706</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.782353</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.976471</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>441</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>1</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.647059</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>442</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>1</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.661765</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>443</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>1</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.661765</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>444</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>1</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.647059</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>445</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>1</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.676471</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>446 rows × 6 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             name  fold                                             sparam  \\\n",
+       "0    appendicitis     0  {'n_neighbors': 3, 'proportion': 0.5, 'random_...   \n",
+       "1    appendicitis     0  {'n_neighbors': 3, 'proportion': 0.5, 'random_...   \n",
+       "2    appendicitis     0  {'n_neighbors': 3, 'proportion': 0.5, 'random_...   \n",
+       "3    appendicitis     0  {'n_neighbors': 3, 'proportion': 0.5, 'random_...   \n",
+       "4    appendicitis     0  {'n_neighbors': 3, 'proportion': 0.5, 'random_...   \n",
+       "..            ...   ...                                                ...   \n",
+       "441  appendicitis     1  {'n_neighbors': 7, 'proportion': 0.5, 'random_...   \n",
+       "442  appendicitis     1  {'n_neighbors': 7, 'proportion': 0.5, 'random_...   \n",
+       "443  appendicitis     1  {'n_neighbors': 7, 'proportion': 0.5, 'random_...   \n",
+       "444  appendicitis     1  {'n_neighbors': 7, 'proportion': 0.5, 'random_...   \n",
+       "445  appendicitis     1  {'n_neighbors': 7, 'proportion': 0.5, 'random_...   \n",
+       "\n",
+       "                 classifier  \\\n",
+       "0    DecisionTreeClassifier   \n",
+       "1    DecisionTreeClassifier   \n",
+       "2    DecisionTreeClassifier   \n",
+       "3    DecisionTreeClassifier   \n",
+       "4    RandomForestClassifier   \n",
+       "..                      ...   \n",
+       "441  DecisionTreeClassifier   \n",
+       "442  DecisionTreeClassifier   \n",
+       "443  DecisionTreeClassifier   \n",
+       "444  RandomForestClassifier   \n",
+       "445  RandomForestClassifier   \n",
+       "\n",
+       "                                                cparam       auc  \n",
+       "0    [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.582353  \n",
+       "1    [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.723529  \n",
+       "2    [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.764706  \n",
+       "3    [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.782353  \n",
+       "4    [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.976471  \n",
+       "..                                                 ...       ...  \n",
+       "441  [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.647059  \n",
+       "442  [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.661765  \n",
+       "443  [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.661765  \n",
+       "444  [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.647059  \n",
+       "445  [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.676471  \n",
+       "\n",
+       "[446 rows x 6 columns]"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tmp = pd.DataFrame.from_dict(results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "smote-variants",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/development/031-deterministic-test-joblib.ipynb b/notebooks/development/031-deterministic-test-joblib.ipynb
new file mode 100644
index 0000000..35f4e05
--- /dev/null
+++ b/notebooks/development/031-deterministic-test-joblib.ipynb
@@ -0,0 +1,480 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import datetime\n",
+    "\n",
+    "from joblib import Parallel, delayed\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.svm import SVC\n",
+    "from sklearn.neighbors import KNeighborsClassifier\n",
+    "from sklearn.model_selection import RepeatedStratifiedKFold\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.metrics import roc_auc_score\n",
+    "\n",
+    "from smote_variants.oversampling import SMOTE\n",
+    "from common_datasets.binary_classification import get_filtered_data_loaders"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging\n",
+    "logger = logging.getLogger('smote_variants')\n",
+    "logger.setLevel(logging.ERROR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "classifiers = {\n",
+    "DecisionTreeClassifier: [{'max_depth': md, 'random_state': 5} for md in range(4, 10, 2)],\n",
+    "RandomForestClassifier: [{'max_depth': md, 'random_state': 5} for md in range(4, 10, 2)],\n",
+    "KNeighborsClassifier: [{'n_neighbors': nn} for nn in range(1, 10, 2)],\n",
+    "SVC: [{'C': c, 'probability': True, 'random_state': 5} for c in [0.001, 0.01, 0.1]]\\\n",
+    "            + [{'C': c, 'probability': True, 'kernel': 'poly', 'degree': 2, 'random_state': 5} for c in [0.001, 0.01, 0.1]]\\\n",
+    "            + [{'C': c, 'probability': True, 'kernel': 'poly', 'degree': 3, 'random_state': 5} for c in [0.001, 0.01, 0.1]]\\\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "datasets = get_filtered_data_loaders(n_col_bounds=(2, 40),\n",
+    "                                        n_bounds=(10, 500),\n",
+    "                                        n_minority_bounds=(10, 500),\n",
+    "                                        n_from_phenotypes=1,\n",
+    "                                        n_smallest=20)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "smote_params = [\n",
+    "    {'n_neighbors': 3, 'proportion': 0.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'random'}},\n",
+    "    {'n_neighbors': 5, 'proportion': 0.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'random'}},\n",
+    "    {'n_neighbors': 7, 'proportion': 0.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'random'}},\n",
+    "    {'n_neighbors': 3, 'proportion': 1.0, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'random'}},\n",
+    "    {'n_neighbors': 5, 'proportion': 1.0, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'random'}},\n",
+    "    {'n_neighbors': 7, 'proportion': 1.0, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'random'}},\n",
+    "    {'n_neighbors': 3, 'proportion': 1.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'random'}},\n",
+    "    {'n_neighbors': 5, 'proportion': 1.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'random'}},\n",
+    "    {'n_neighbors': 7, 'proportion': 1.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'random'}},\n",
+    "    {'n_neighbors': 3, 'proportion': 0.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'deterministic'}},\n",
+    "    {'n_neighbors': 5, 'proportion': 0.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'deterministic'}},\n",
+    "    {'n_neighbors': 7, 'proportion': 0.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'deterministic'}},\n",
+    "    {'n_neighbors': 3, 'proportion': 1.0, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'deterministic'}},\n",
+    "    {'n_neighbors': 5, 'proportion': 1.0, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'deterministic'}},\n",
+    "    {'n_neighbors': 7, 'proportion': 1.0, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'deterministic'}},\n",
+    "    {'n_neighbors': 3, 'proportion': 1.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'deterministic'}},\n",
+    "    {'n_neighbors': 5, 'proportion': 1.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'deterministic'}},\n",
+    "    {'n_neighbors': 7, 'proportion': 1.5, 'random_state': 5, 'ss_params': {'within_simplex_sampling': 'deterministic'}},\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for data_loader in datasets:\n",
+    "        results = []"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def job_generator(data_loader):\n",
+    "\n",
+    "    dataset = data_loader()\n",
+    "    print(datetime.datetime.now(), dataset['name'])\n",
+    "    if dataset['name'] in ['iris0', 'dermatology-6']:\n",
+    "        continue\n",
+    "    X = dataset['data']\n",
+    "    y = dataset['target']\n",
+    "\n",
+    "    validator = RepeatedStratifiedKFold(n_splits=5, n_repeats=40, random_state=5)\n",
+    "\n",
+    "    for fidx, (train, test) in enumerate(validator.split(X, y, y)):\n",
+    "        X_train = X[train]\n",
+    "        X_test = X[test]\n",
+    "        y_train = y[train]\n",
+    "        y_test = y[test]\n",
+    "\n",
+    "        ss = StandardScaler()\n",
+    "        ss.fit(X_train)\n",
+    "        X_train = ss.transform(X_train)\n",
+    "        X_test = ss.transform(X_test)\n",
+    "\n",
+    "        for sparam in smote_params:\n",
+    "            oversampling = SMOTE(**sparam)\n",
+    "            X_samp, y_samp = oversampling.sample(X_train, y_train)\n",
+    "\n",
+    "            for classifier, cparams in classifiers.items():\n",
+    "                for param in cparams:\n",
+    "                    job = {\n",
+    "                        'X_samp': X_samp,\n",
+    "                        'y_samp': y_samp,\n",
+    "                        'X_test': X_test,\n",
+    "                        'y_test': y_test,\n",
+    "                        'classifier': classifier,\n",
+    "                        'param': param\n",
+    "                    }\n",
+    "                    description = {\n",
+    "                        'name': dataset['name'],\n",
+    "                        'fold': fidx,\n",
+    "                        'sparam': sparam,\n",
+    "                        'classifier': classifier.__name__,\n",
+    "                        'cparam': param\n",
+    "                    }\n",
+    "                    yield job, description"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def do_job(job, description):\n",
+    "    classifier_obj = job['classifier'](**job['param'])\n",
+    "    classifier_obj.fit(job['X_samp'], job['y_samp'])\n",
+    "    y_pred = classifier_obj.predict_proba(job['X_test'])\n",
+    "    auc = roc_auc_score(job['y_test'], y_pred[:, 1])\n",
+    "    return description | {'auc': auc}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-12-14 22:50:59.240523 bupa\n",
+      "2023-12-14 23:21:28.186055 cleveland-0_vs_4\n",
+      "2023-12-14 23:53:18.753317 CM1\n",
+      "2023-12-15 00:54:19.832280 dermatology-6\n",
+      "2023-12-15 01:33:56.591067 ecoli1\n",
+      "2023-12-15 02:09:46.543042 glass0\n",
+      "2023-12-15 02:39:14.222835 haberman\n",
+      "2023-12-15 03:12:29.604080 hepatitis\n",
+      "2023-12-15 03:41:29.622140 ionosphere\n",
+      "2023-12-15 04:13:36.586725 iris0\n",
+      "2023-12-15 04:29:00.888280 led7digit-0-2-4-6-7-8-9_vs_1\n",
+      "2023-12-15 05:02:23.042133 monk-2\n",
+      "2023-12-15 05:38:21.101381 new_thyroid1\n",
+      "2023-12-15 06:05:34.747387 page-blocks-1-3_vs_4\n",
+      "2023-12-15 06:53:06.746426 saheart\n",
+      "2023-12-15 07:30:53.274009 shuttle-6_vs_2-3\n",
+      "2023-12-15 07:58:28.826586 yeast-1_vs_7\n"
+     ]
+    }
+   ],
+   "source": [
+    "for data_loader in datasets:\n",
+    "    dataset = data_loader()\n",
+    "    results = Parallel(n_jobs=3)(delayed(do_job)(*x) for x in job_generator(data_loader))\n",
+    "    results = pd.DataFrame.from_dict(results)\n",
+    "    results.to_csv(f\"{dataset['name']}.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results.to_csv(f\"{dataset['name']}.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-12-14 21:42:48.609799 appendicitis\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'name': 'appendicitis',\n",
+       " 'fold': 0,\n",
+       " 'sparam': {'n_neighbors': 3,\n",
+       "  'proportion': 0.5,\n",
+       "  'random_state': 5,\n",
+       "  'ss_params': {'within_simplex_sampling': 'random'}},\n",
+       " 'classifier': 'DecisionTreeClassifier',\n",
+       " 'cparam': {'max_depth': 2, 'random_state': 5},\n",
+       " 'auc': 0.5823529411764707}"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "do_job(*next(job_generator(datasets[0])))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>fold</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.582353</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.723529</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.764706</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.782353</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 3, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.976471</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>441</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>1</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.647059</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>442</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>1</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.661765</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>443</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>1</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.661765</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>444</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>1</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.647059</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>445</th>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>1</td>\n",
+       "      <td>{'n_neighbors': 7, 'proportion': 0.5, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>[{'max_depth': 2, 'random_state': 5}, {'max_de...</td>\n",
+       "      <td>0.676471</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>446 rows × 6 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             name  fold                                             sparam  \\\n",
+       "0    appendicitis     0  {'n_neighbors': 3, 'proportion': 0.5, 'random_...   \n",
+       "1    appendicitis     0  {'n_neighbors': 3, 'proportion': 0.5, 'random_...   \n",
+       "2    appendicitis     0  {'n_neighbors': 3, 'proportion': 0.5, 'random_...   \n",
+       "3    appendicitis     0  {'n_neighbors': 3, 'proportion': 0.5, 'random_...   \n",
+       "4    appendicitis     0  {'n_neighbors': 3, 'proportion': 0.5, 'random_...   \n",
+       "..            ...   ...                                                ...   \n",
+       "441  appendicitis     1  {'n_neighbors': 7, 'proportion': 0.5, 'random_...   \n",
+       "442  appendicitis     1  {'n_neighbors': 7, 'proportion': 0.5, 'random_...   \n",
+       "443  appendicitis     1  {'n_neighbors': 7, 'proportion': 0.5, 'random_...   \n",
+       "444  appendicitis     1  {'n_neighbors': 7, 'proportion': 0.5, 'random_...   \n",
+       "445  appendicitis     1  {'n_neighbors': 7, 'proportion': 0.5, 'random_...   \n",
+       "\n",
+       "                 classifier  \\\n",
+       "0    DecisionTreeClassifier   \n",
+       "1    DecisionTreeClassifier   \n",
+       "2    DecisionTreeClassifier   \n",
+       "3    DecisionTreeClassifier   \n",
+       "4    RandomForestClassifier   \n",
+       "..                      ...   \n",
+       "441  DecisionTreeClassifier   \n",
+       "442  DecisionTreeClassifier   \n",
+       "443  DecisionTreeClassifier   \n",
+       "444  RandomForestClassifier   \n",
+       "445  RandomForestClassifier   \n",
+       "\n",
+       "                                                cparam       auc  \n",
+       "0    [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.582353  \n",
+       "1    [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.723529  \n",
+       "2    [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.764706  \n",
+       "3    [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.782353  \n",
+       "4    [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.976471  \n",
+       "..                                                 ...       ...  \n",
+       "441  [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.647059  \n",
+       "442  [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.661765  \n",
+       "443  [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.661765  \n",
+       "444  [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.647059  \n",
+       "445  [{'max_depth': 2, 'random_state': 5}, {'max_de...  0.676471  \n",
+       "\n",
+       "[446 rows x 6 columns]"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tmp = pd.DataFrame.from_dict(results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "smote-variants",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/development/032-smote-rf.ipynb b/notebooks/development/032-smote-rf.ipynb
new file mode 100644
index 0000000..7d9aed8
--- /dev/null
+++ b/notebooks/development/032-smote-rf.ipynb
@@ -0,0 +1,507 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from scipy.stats import wilcoxon\n",
+    "\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.metrics import roc_auc_score\n",
+    "from sklearn.model_selection import RepeatedStratifiedKFold\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.decomposition import PCA\n",
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "from conditioning_bias import OperatorRandomForestClassifier, OperatorDecisionTreeClassifier\n",
+    "\n",
+    "from smote_variants.oversampling import SMOTE\n",
+    "\n",
+    "import common_datasets.binary_classification as binclas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = binclas.load_haberman()\n",
+    "X = dataset['data']\n",
+    "y = dataset['target']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = StandardScaler().fit_transform(X)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<matplotlib.collections.PathCollection at 0x7f462b4ea800>"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiMAAAGdCAYAAADAAnMpAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/SrBM8AAAACXBIWXMAAA9hAAAPYQGoP6dpAABRe0lEQVR4nO3de3wU9b0//tfsJpsEyUYSCUm4BLS0cYlQUKNB8HisaBBBaY96tAhVSzXCqdRjf0hbjdRq5KttqR6Nl1oV+aKi/aF4C021RTmEYkUsGLFCuZUkhCaQDZfcdub7x7Ihm+xcNju3nXk9Hw9qk53MfGYS2Hfm83nNW5AkSQIRERGRRTxWD4CIiIjcjcUIERERWYrFCBEREVmKxQgRERFZisUIERERWYrFCBEREVmKxQgRERFZisUIERERWSrF6gEoEUUR9fX1yMzMhCAIVg+HiIiINJAkCW1tbSgoKIDHo37fw9bFSH19PUaOHGn1MIiIiGgA9u/fjxEjRqhuZ2gxUlVVhaqqKuzZswcAMG7cONx3332YPn26pq/PzMwEED4Zv99v1DCJiIhIR8FgECNHjux5H1djaDEyYsQIPPzwwxg7diwkScKLL76Iq6++Gp9++inGjRun+vWRqRm/389ihIiIKMloXWIhmN0oLzs7G4888ghuvfVW1W2DwSCysrLQ2trKYoSIiChJxPv+bdqakVAohNdeew3Hjh1DaWlpzG06OjrQ0dHR83EwGDRreERERGQRw6O927Ztw+DBg5GWlobbb78da9asQSAQiLltZWUlsrKyev5w8SoREZHzGT5N09nZiX379qG1tRWvv/46fvvb32L9+vUxC5JYd0ZGjhzJaRoiIqIkEu80jelrRi677DKcddZZePrpp1W35ZoRIiKi5BPv+7fpT2AVRTHq7gcRERG5m6ELWJcsWYLp06dj1KhRaGtrw6pVq/DnP/8Z69atM/KwRERElEQMLUaampowd+5cNDQ0ICsrC+PHj8e6deswbdo0Iw9LREREScTQYuS5554zcvfWEkPA3o3A0YPA4GFA4WTA47V6VEREREnH1r1pbKtuLVC9GAjWn/qcvwAoWwYEZlk3LiIioiRk+gLWpFe3Flg9N7oQAYBgQ/jzdWutGRcREVGSYjESDzEUviOCWGnok5+rvie8HREREWnCYiQeezf2vyMSRQKCB8LbERERkSYsRuJx9KC+2xERERGLkbgMHqbvdkRERMRiJC6Fk8OpGQgyGwiAf3h4OyIiItKExUg8PN5wfBdA/4Lk5MdlD/N5I0RERHFgMRKvwCzguhWAPz/68/6C8Of5nBEiIqK48KFnAxGYBRTN4BNYiYiIdMBiZKA8XmDMVKtHQURElPQ4TUNERESWYjFCRERElmIxQkRERJZiMUJERESWYjFCRERElmIxQkRERJZiMUJERESWYjFCRERElmIxQkRERJZiMUJERESWYjFCRERElmIxQkRERJZiMUJERESWYjFCRERElmIxQkRERJZiMUJERESWYjFCRERElmIxQkRERJZiMUJERESWYjFCRERElkqxegC2JIaAvRuBoweBwcOAwsmAx2v1qIiIiByJxUhfdWuB6sVAsP7U5/wFQNkyIDDLunERERE5FKdpeqtbC6yeG12IAECwIfz5urXWjIuIiMjBWIxEiKHwHRFIMV48+bnqe8LbERERkW5YjETs3dj/jkgUCQgeCG9HREREumExEnH0oL7bERERkSYsRiIGD9N3OyIiItKEaZqIwsnh1EywAbHXjQjh1wsna95lSJSweXcLmtrakZuZjpIx2fB6BN2GTERE5AQsRiI83nB8d/VcAAKiC5KTBUTZw5qfN1K9vQFL36pDQ2t7z+fys9JRMTOAsuJ83YZNRESU7DhN01tgFnDdCsDfp1jwF4Q/r/E5I9XbG1C+cktUIQIAja3tKF+5BdXbG/QaMRERUdLjnZG+ArOAohkDfgJrSJSw9K062YCwAGDpW3WYFsjjlA0RERFYjMTm8QJjpg7oSzfvbul3R6Q3CUBDazs2725B6Vk5AxwgERGRc3CaRmdNbfKFyEC2IyIicjpX3hkxMuWSm5mu63ZERERO57pixOiUS8mYbORnpaOxtV0uIIy8rHABRERERC6bpjEj5eL1CKiYGQDQEwjuEfm4YmaAi1eJiIhOck0xopZyAcIpl5AYa4v4lBXno2rOJORlRU/F5GWlo2rOJD5nhIiIqBfXTNOYnXIpK87HtEAen8BKRESkwjXFiBUpF69HYHyXiIhIhWuKkXhSLuwpQ0REZB7XFCNaUy6Hj3ViyrIP2FOGiIjIJK5ZwKol5TJrQj4WrGJPGSIiIjO5phgBlFMuT9w4EWs/azAlbUNERESnuGaaJkIu5cKeMkRERNZwXTECxE65sKcMERGRNVw1TaOEPWWIiIisYWgxUllZifPPPx+ZmZnIzc3FNddcgy+//NLIQ2oSEiXU7mrGm1sPoHZXM0Ki1JO2kQvwCginauLqKSOGgN0fAdteD/9XDOkxfCIiIkcxdJpm/fr1WLBgAc4//3x0d3fjJz/5CS6//HLU1dXhtNNOM/LQspQa5VXMDKB85RYIQNRC1gH1lKlbC1QvBoL1pz7nLwDKlgGBWYmeBhERkWMIkiSZFg85dOgQcnNzsX79elx88cWq2weDQWRlZaG1tRV+vz/h40ca5fU94Uh5UTVnEgAk3tW3bi2wei4gd6TrVrAgISIix4r3/dvUBaytra0AgOzs2FMdHR0d6Ojo6Pk4GAzqdmy1RnkCwkXIhsWXJtZTRgyF74goHan6HqBoBuDxDvR0iIiIHMO0BayiKGLRokW46KKLUFxcHHObyspKZGVl9fwZOXKkbsePJ7obSdtc/c3hKD0rJ75Hwe/dGD01E+tIwQPh7YiIiMi8YmTBggXYvn07XnnlFdltlixZgtbW1p4/+/fv1+34pkV3jx7UdzsiIiKHM2WaZuHChXj77bfx4YcfYsSIEbLbpaWlIS0tzZAxmBbdHTxM3+2IiIgcztA7I5IkYeHChVizZg0++OADjBkzxsjDKYpEd5XEHd2NpXByODWjFBL2Dw9vR0RERMYWIwsWLMDKlSuxatUqZGZmorGxEY2NjThx4oSRh43J6xFQPFx5RW/xcH9860Ni8XjD8V0Asi35yh7m4lUiIqKTDC1Gqqqq0NraiksuuQT5+fk9f1599VUjDxtTZ7eI979oUtzm/S+a0NktJn6wwKxwfNffJwrsL2Csl4iIqA9D14yY+AgTVS/V7oFaw11RCm9369QzEz9gYFY4vrt3Y3ix6uBh4akZ3hEhIiKK4ppGeXtbjuu6nSYeLzBmqn77IyIiciDXFCOF2YN6/r8HIko8O5CLI2jC6dgsFkE8OWPVe7tEhURp4A9PIyIicgnXFCM3lY7Gg+9+gWnCZlSkrkCB0NLzWr2UjaVdc1EjleCm0tG6HE+pB47mx8oTERG5gGkPPbOaL8WD+WdsR1XqcuShJeq1PLSgKnU55p+xHb6UxC9JpAdO3ye+Nra2o3zlFlRvb0j4GERERE7hmmKks7ML3ws+BQDoO1MS+Xhe8Gl0dnYldBy1HjhAuAdOSG01LRERkUu4phipqV6DfKGlXyES4RGAAqEZNdVrEjpOPD1wiIiIyEXFyPHmA7puJ8e0HjhEREQO4ZpiZFDOcF23k2NaDxwiIiKHcE0xMq1sNhqkbNkHn4kSUC/lYFrZbE37C4kSanc1482tB1C7q7lnDUikB45CZxp9euAQERE5hGuivT5fKjaO/TFmf7UEohS9iDVSoNSOvRvf8aWq7ksttlsxM4DylVsgAFELWSOHrJgZ4PNGiIiITnLNnREA+M6cO7BmbCUOIvquRCNysGZsJb4z5w7VfWiJ7ZYV56NqziTk9ekSnJeVjqo5k/icESIiol4EyU4NZPoIBoPIyspCa2sr/H7ljrvx6OzsQk31GhxvPoBBOcMxrWw2fBruiIRECVOWfSCblhEQLjg2LL4UXo/AJ7ASEZErxfv+7Zppmt58vlTMmHVd3F8XT2y39KwceD0CSs/KSWCkREREzueqaZpEMbZLRESkP1feGTnR3olVr72M9sMNSB+SjxuvvQEZ6T7Vr9M9tiuGgL0bgaMHgcHDgMLJ4U6/FnDalJLTzoeIyMlcV4w8WfUrXNP4OG6NNMprAeor78fzef+FO8rvUvzaSGy3sbU95uPeI2tGNMV269YC1YuBYP2pz/kLgLJlQGCW5vPRg9Oa+jntfIiInM5V0zRPVv0Ktzcujdko7/bGpXiy6leKX+/1CKiYGQCAfs8RiSu2W7cWWD03uhABgGBD+PN1a1XORD9Oa+rntPMhInID1xQjJ9o7cU3j4wDkG+Vd0/g/ONHeqbifhGO7Yih8R0SplV71PeHtDOa0pn5OOx8iIrdwzTTNqtdePjU1E4NHAArQjOdeexm33jRPcV9lxfmYFsgb2JqEvRv73xGJIgHBA+HtxkxV318C4k0H2Z3TzoeIyC1cU4y0H9Z2e17rdgOO7R49qO92CXBaOshp50NE5BauKUbSh+QD8jdGorcz0uBh+m6XgN6pHw9ElHh2IBdH0ITTsVksgnhyFi9ZmvqxSSERUXJyTTFy47U3oL7yfuShpd+aESDcn6YRObjx2huMHUjh5HBqJtiA2OtGhPDrhZONHQdOpYMmtH2I+1JXoKDXNFa9lI2fd83FZ5kXJ01TP13TTkREZBrXLGDNSPdhufcWAOjXuTfy8XLvzZqeN5IQjzcc3wUgm8kpe9iU5414PQKenPRPPJm6PGbC6MnU5Xhy0j+T5vkcuqWdiIjIVK4pRk50hrD6+CSUdy1CY4xGeeVdi7D6+CSc6DQ+xYLALOC6FYC/z5SQvyD8ebOeMyKGMPHzhyEIsRNGgiBg4ufLTEn26IVNComIko9rGuXd+8Y2vLRpHwDl9RE3XTgKD1xzTsJj18TqJ7Du/gh48Sr17ea9bXiyR298AisRkXXYKE/GnubjPf9fhAebxIDqdobzeK19k7dRskdvbFJIRJQ8XDNNMzpnkK7bOYKNkj1ERORerilGfnJl7DshA93OESLJnn7LPSMEwD/clGQPERG5l2uKkQyfF9MCuYrbTAvkIsNnTddcS9go2UNERO7lmmIEAM4847SEXnckuyR7iIjItVyTpunsFlF073v9njHSm0cAdjwwHb4UV9VoYVYne4iIyDGYppHxUu0exUIECD/87KXaPbh16pnmDMpOrE72EBGRa7nmFsDeFm2RXa3bERERkT5cc2ekMFtbZLcwe5A7pyzceM5ERGQLrilGbiodjQff/UJ1zcjcIduA5TOAYP2pF/wF4dSJUxdz1q0Fqhe765yJiMg2XDNN40vxIOc05SZ4/5GxBamvz4t+UwbCHXZXzw2/aTtN3drwubnpnImIyFZcU4wcbe/GoaOdsq97IGJR6HeQYjafP/m56nuSqmmcKjEUviPipnMmIiLbcU0x8qNXP1V8vcSzAwVCi+yzSAEJCB4Ir6twir0b+98RieLAcyYiIttxTTGy7/AJxddzcUTbjpKwaZwsBzfKIyKi5OGaBayjhmTgy8Y2AOEpmRLPDuTiCJpwOjaLRWjC6dp25KSmccnaKI/JHyIiR3FNMfLr6yei+P51uMKzGRWpK1AgtPS8Vi9l4+ddc1AvZSNfOAwhxhoKCQIEf4GzmsZFGuUFGxB73YgQft1O58zkDxGR47hmmmZwegquG7QFVanLkYeWqNfy0IInUx9DtTAFkiT1i/+KEiBJEj4dt9hZv4EnW6M8Jn+IiBzJNcVIZ2cXfhT6HYDw80R6i3xcJv0vFnTdiUZkR73eiBzc0bUId2wZgZDaM+WTTbI0ymPyh4jIsVwzTVNTvQYzhBbZ1z0CUIBmHEYmpnQ81m9NiQgP0NqOzbtbUHpWjokjN0FgFlA0w97rMOJJ/rDHDhFRUnFNMXK8+YCm7XJxBCI82CQGYr7e1Nau57Dsw+6N8pj8ISJyLNdM0wzKGa5pO7VUTW5mug6jobgla/KHiIhUuebOyLSy2Wj4ZDGGoaXfmhEgvEi1ETn4WCyK+fUCgLysdJSMCa8nCXV3Y8df1uHE4QPIGDIcRRdcAW+Kay6n+YxI/rgxIuzGcyYi23PNu6fPl4qNY3+M2V8tgSQBQq+CRDr53vZG3kKE9sa+WSQBqJgZgNcj4NN1L6KgdinGobnn9YM1OagvrcDEK+YZeBYuFkn+rJ6LcGnYuyAZQPLHjRFhN54zESUF10zTAMDfD7bJPu5dAFB/RPkprQDw6boXMWHjDzFUao76/FCpGRM2/hCfrnsx8YFSbHolf9wYEXbjORNR0hAkSbJtVjUYDCIrKwutra3w+/0J7evo8Q4ElxUhT2WaZkrHb8LJmT4EAAX+VPz/nbdjqNQsu48mIQdDf/Z3TtkYKZGpBjEELC9WSOacnO5ZtM050xduPGcislS879+uuTNStSL81NVYRQRwMtorNKPEsyPm6xKAkUc/wzDELkQi+8hDM3b8ZZ0+g6bYIsmfc/4j/N943kDd2BzQjedMREnFNcVId7BR03ZKDfO0NtM7cVhbjJgs4MaIsBvPmYiSimuKkRR/nqbtlKK9WpvpZQzRFiMmC7gxIuzGcyaipOKaYqR87lzUS9n9+s5EiBJQL+Vgs0K0d//gCTiIHMV9NCIHRRdcoc+gSX+RiLDSUmb/cHs1B0zUyXOWZM5ZcuI5E1FScU0xMnhQGh4WvwcAMRvhAcBDobmQ4JFrGYd7Z52D+tIKxX00lFZw8aqdJVtzQD14vPh03D3uagJJREnFNcXI0fZurO06D+Vdi2I2wivvWoS3u8/Hr66bgLys6Kes5mWlo2rOJJQV52PiFfPw2eTHcEiI7k/TJOTgs8mP8TkjySBZmgPqJCRKuGPLCNmffcc2gSSipOGaaO/8Fz9GzRdNAAAPxNiN8ABMOzsXT910HjbvbkFTWztyM8NPXfX2idDwCawO4JKnkdbuasYNz24CoPyz//L8C53XBJKILBHv+7dr3j33HT71QDOlRnj7Dp+A1yOo/qPsTUnBuItm6DpGMpndmwPqpHdzR1c2gSQi2zN0mubDDz/EzJkzUVBQAEEQ8MYbbxh5OEWjhmRo304MAbs/Ara9Hv6vGDJ4dMkhJEqo3dWMN7ceQO2uZt7WTxJamzsmYxNI/kwSOYOhd0aOHTuGCRMm4JZbbsG3v/1tIw+l6tfXT0Tx/eoPI3vsm/8Els9l/44+qrc3YOlbdWhoPfXbc35WOipmBlBWnK/wlWS1kjHZyM9KR2Nru1yLwagmkMmCP5NEzmHonZHp06fjF7/4BWbPnm3kYTQZnJ4Cn1cuzhl2VcrHyFhzM/t39FG9vQHlK7dE/aMPAI2t7ShfuQXV2xssGhlp4fUIqJgZnpqRS4pFmkAmC/5MEjmLrdI0HR0dCAaDUX/00nq8C50h+Vu4Hoj4ifdFSDF/dzz5uep7XDdlExIlLH2rTumqYOlbdbw9bnNlxfmomjNJMSmWLPgzSeQ8tlrAWllZiaVLlxqy71te2Kz4eolnBwqEFoUtevXvcMGix4jNu1v6/fbZmwSgobUdm3e3MIlhc2XF+ZgWyFNNitkdfyaJnMdWxciSJUtw11139XwcDAYxcuRIXfZdr/CPF6C974zb+ndoTVgwiZEctCTF7I4/k0TOY6tiJC0tDWlpaYbsuyArXfG3Ka19Z9zWv8PJSQxKTvyZJHIeW60ZMdLvvlfS8/89EHGhpw6zPBtxoacOHojYLBahXsqW7d9h554lRsYbI0kMhauCfLOTGE6MXjvxnAxiy59JIkqIoXdGjh49ip07d/Z8vHv3bmzduhXZ2dkYNWqUkYfuJ2tQKgpzMlB0eD0qUldErQ+pl7KxtGsunkz/Pn7R8QjC/5z1fkO3b88So+ONkSRG+cotclfF3CRG3VqgerGzotdOPCcD2e5nkogSZuidkb/+9a+YOHEiJk6cCAC46667MHHiRNx3331GHlbW9YO3oip1OfIQvVA1Dy2oSl2OgtMzkqpniVnxRtskMerWhiPWTopeO/GcTGCbn0ki0oVretOcaO/E4cpvIA8tiPULkyiFm4YNWbIDGT6v7XuWhEQJU5Z9ILsOJvIgqw2LL9XtN8SQKFmXxBBDwPLi/m/aPYRw0bhom+2+V7KceE4ms/RnkohksTeNjFWvvYxbFaK7HgEoQDOee+1l3HrTPNvHd62IN1qaxNi7UeFNG0jK6LUTz8lkTkgHEZGLFrC2H9Y2ZaF1O6u5Lt6oNVKdTNFrJ54TEdEAuObOSPqQfEDpmWa9t7NTa3mZsegdb7T97W6tkepkil478ZyIiAbANcXIjdfegPrK+1XXjMwZn9l/Ht+qZINCyqKkaKZuzc+SouFY4eTwuQcbALkz9hfYMnoty4nnREQ0AK6ZpslI9+Fd8aJwFLDPv/uSFH7zrhPHIG3NLfZINqikLLw73tKl+VnSNBzzeMMFIQDZM7Zh9FqRE8+JiGgAXFOMtB5tx5We/wUACH3+3Y98fInnE3s0yhND4TsiKmMpC+QmFG9MuoZjgVlJFb3WxInnREQUJ9dM0/zyt8/j5wppGkEAUmK+LUeYmGyII2VRVjx1wM3PkrLhWGAWUDTDPmt69ODEcyIiioNrihHdEglmJBviTFkMNN6YtIkcj9d5UVcnnhMRkUbuKUYGD4PWxryq+4HB6ROTUhZsOKYDOyWvbMT26SwishXXFCP//f2bUf/I/0E+WvqtGQHCi1hDEOAVYrfKkyBAOJlsMDx9YlLKItJwTI9Ejiuxp0xMSZHOIiJbcc0C1qzB6fijZwqA2GkaAFgvnAtJCsd8exMlQJIkfDpuMarrmoxPn5iUsog0HFM4ChuOyWFPmZiSJp1FRLbimmIk1N2NK6T/hYTYaRoJwNnSbizouhONiL4T0Igc3NG1CHdsGYH7135uTvrEpJQFG44NgMa0kynJKxtJunQWEdmGa6ZpdvxlHcahuf8tgJMivWkOIxNTOh5DiWcHcnEETTgdm8UiiPAACskTwID0iUkpi7Li/AEnclyJPWViSsp0FhHZgmuKkROHD2jaLhdHIMKDTWJgwMfSNX1iUsqCDcfiwJ4yMSVtOouILOeaaZqMIcM1bdeE0xM+FtMnDseeMjExnUVEA+WaOyNFF1yBgzU5GCo1K/am+Vgsivn1kWSJJEk4GOzQlD5RizeaFX9kzFJn7CkTE9NZJmCUnBzKNcWINyUF9aUVGLrxhxAlRBUkkfV0tWPvRmh77JtFEtCTPClfuSXc46bX633TJ2rxRrPij4xZGiCSdlo9F5D7SXBhT5lIOkvL3w8aAEbJycFcM00DAE8eDKC8a1HMtEx51yK8eOQc1X1oSZ+oxRsr360zJf7ImKWB2FMmJqazDMIoOTmcIEl9n7phH8FgEFlZWWhtbYXf709oXyc6Qzj7vmoAgAdi7LSMgsgt5g2LL4XXI8hOfYRECVOWfSCbKhAQjhLLpRv7HmegtIxDj+O4Hm+bx8SpQR2JIWB5sUKC6+S04KJt/Nkj24j3/ds10zQPvVvX8/8HkpbpG0uUS59oiTcqlX96xR8ZszQJe8rExHSWjhglJxdwzTTNnubjuuxHLZaoV2wx0f0wZknkEIySkwu4phgZnTNIl/1EYokhUULtrma8ufUAanc19zxVUq/YYqL7YcySyCEYJScXcM00zU+uDOClTfsS2kf+yViiUkJlWiAP+VnpilMkHg1rRhKNPzJmSeQQjJKTC7jmzkiGz4uhg33K26QqX45ZE/JRU9eomFCpqWtE8XDlxTrFw/3hhax9Pq9n/JFN8IgcwqTGmURWck0xcqIzhENHO5W36RIVX39zaz3uX6vcCKzize14/4smxf1sPxDE4//5TcPjj4xZEjkEo+TkcK6ZpumdphmoxmCH4usSgINtygUPEJ6iOdjWgQ2LLzU8/sgmeEQOYVLjTCIruKYY0StNo5e9LcdNiz8yZknkEIySk0O5phgZnTMIH32lbduBPBQtXoXZGtM9NnqoVlI9yMpG142IiJS5phjRmqa5wrMZFakrUCC09HyuXsrGz7vmYuvgqQAEHAzKJ1RyM304dLRTNi0DhNM0N5WOVh+0jXpRJFWPGxtdNyIiUueaBawZPi9SVM52unczqlKXIw8tUZ/PQwueTF2OqnMP4P5ZygmVpVcXY/7UMYrHmT91DHxqg7FRL4qk6nFjo+tGRETauKYYORTsQLdCWMYDEfemrIAgRHf0BcIfC4KAiZ8vQ1kgVzWhsuTKAG67eEzM/dx28RgsuVLlUfRiKPybvVJup/qe8HYGC4kSlr6lnCBa+lZdz0PfLGWj60ZERNq5Zppm9pMbFF8v8eyImprpS+jV/6GseKpqQmXJlQH89+VFeKl2D/a2HEdh9iDcVDpa/Y4IYKteFEnV48ZG142IiLRzTTHScqxL8fVcHNG2o5P9H7QkVHwpHtw69Uxt+41xDN22S0BS9bix0XUjIiLtXDNNk31aquLrTThd247M6P9go14USdXjxkbXjYiItHNNMbLmjimKr28Wi1AvZUPqtzQ1QgD8w3v6P8g1yosihoDdHwHbXg//V+tahUgvCo1jMVKkx42A8LqaCz11mOXZiAs9dfBAhIBTPXssZ6PrRkRE2rlmmmaoPw3+9BQE27tjvi7Cg0eEm/Er/Aoioqs0EeG3N+Fk/wdNMddE4qWRXhSr5548cu9Cx9xeFJEeN2+segr3yUSer5l5uz2eN2Kj60ZERNq55s4IAFxwpvJv77vO+HeUd96JRil6u0YpB+Wdd6JaPF9bzFWPeKmNelGUeT5Gle83yOuzwDdPaEGV7zco83xs2lhU2ei6ERGRNoIkSTbIZMYWDAaRlZWF1tZW+P3KnXDVnOgM4ez7qjVtG+sJrBI8yMtKhyRJsj1qBAAF/lRsSL8Tgmyq42S770XbtP2GbvWTRMUQsLxYIaUS5/mYxerrRkTkYvG+f7tmmiaeRnkiPNgk9n8WiFLEFQhPCow8+hmETh3jpVb3okjWuKzV142IiDRzzTSNWY3y4o0I2x7jskREZDDX3BmJp1FeIiyJCBs5JeHkuKxJUzl2ajBop7EQEUW4phjR2ihPSf7JNSMHgx2yjfL2D54AKb0ACDaEn9rahwQBgr9Av3ip0U3hInHZYANiP2b95JqRZIvLmtRMz04NBu00FiKi3lwzTZPh86IwJ0NxG59X+TfEoZk+3D9rHAD5Rnn3zjoHW8fdA0mS+nXuFSVAkiR8Om6xPr+Bm9EULhKXBSB71skWlzWpmZ6dGgzaaSxERH25phjp7Baxv+WE8jYh5WDR3/4ZxL99XblR3rRAHu7YMgLlXYvQiD4RYeTgjq5FuGPLiMQby5nZFM5JcVmTrpudGgzaaSxERLG4Zprmpdo9/e5UDMRD79bhgWvOkW2UV7urGQ2t7WhACWo6zusXERbhAfRoLGd2yiUwCyiakfxxWZOum50aDNppLEREsbimGNnbok+aJpLKkWuU17thnFxEuO92A2JFysUJcVmTrpudGgzaaSxERLG4phgpzB6ky35G5yjvR/fGcnKJjzhSLp2dXaipXoPjzQcwKGc4ppXNhs+n3DjQMFY/jMykdFC8PwdGplziGovV3x8iG2MazTiuKUZuKh2NB9/9IuGpmp9cGftOR0SksZzSbXHNjeWUEh9FMzSlXH6/4TNM3jkXMyKPct8LNHyyGBvH/hjfmXOH+hj0ZFKCRZFJ6aDIz0Fja7ts8irv5M+B0SkXzWNp3wAsv8fa7w+RTTGNZizXLGD1pXgwMjuxNM34EX5k+JR/S/R6BMyaoPyDOWtCvno1rZb42PGOasqldtAlmL3zpxiG6J4yw9CC2V8twe9XPqk8Bj2ZlGBRZVI6KNJgUOEoqJgZQE1do+EpFy1jeXLSP+F9bZ713x8iG2IazXiuKUZOdIawtzmxNM2htk7VxEFIlLD2M+UfzLWfNSjvR2vio2iGbMql69vPY3TDewCAvnVP5OPSrx5FZ2eX4lh1YWbyRwuT0kFlxfmqySuzUi6KY/nuBEz8/GHY5vtDZCNMo5nDNdM08fSmkaMlcaCWXNC0n3gSHzIplz+8/ftTUzMxeASgAM14p3oNZsy6TnG8CbNjfxuT0kFlxfmqySs5eqdcZMeyd4P9vj9ENsE0mjlcU4zo1ZtGLXGgS3Ih3sRHjJTL8eYDmnahdbuE2LW/jUnpIC3JKyV6plxijsWu3x8iG2AazRyumaZRS8FopZZM0CVNo0PiY1DOcE270LpdQpzc3yYBuievBorfHyJZtvl76nCuKUbUUjARAgAPRFzoqcMsz0Zc6KmDByIERKdgQqKE2l3NeHPrAdTuau6ZL4wkFzTtRwwBuz8Ctr0e/m9kTj6S+Oi33LDXKP3DFRMf08pmo0HKlk0PiRJQL+VgWtlsTdcl/EUy41UT7/kM9DhJpvfPSix9f+YMw+8PkSzb/D11ONdM02T4vJgWyEVNXZPsNuNH+JFf/0dUpK5AQa/1FvVSNpZ2zcXsmbfD6xFUI14VMwN4Y9VTuC/Gfn7eNRfXzLwd3h1vKcdcy5aFUwwQEL2wUFviw+dLxcaxP8bsr5ZAlKIXsUYKlNqxd+M7Wp83kkgsN5Jg0XI+doj/miSScilfuUXuqqBiZsD45xjw+0MkyzZ/Tx1OkCTJtkuAg8EgsrKy0NraCr/fn/D+qrc34PaVW2Rfv23odiwOPgQg9pv3U3kVOHPqDShfuaXfyurI5lVzJqHM8zGk1XMhQYq69SQCECBAmPxfwMbH0T+9cHIvkURHzH/4h4ffGDT+w//7lU9i8lePID+qKMoJFyJanzMSieWqjVfLfpTOR6/jJBnbPL+A3x8iWbb5e5ok4n3/dk0xEhIlTFn2geyqaA9EbEj7IfLQ0i8KC4QLkkbk4NspT6DxmBhzHwKAAn8qNqTfCUE2nSAAggBIsffR89CtRdvCv4nq8ETMhJ7AKoaA5cUKaYs+49Wyv1jno/dxkoxtnuzI7w+RLNv8PU0C8b5/mzJN88QTT+CRRx5BY2MjJkyYgMcffxwlJSVmHLqHWjyrxLMjakqlr0gUdvSJ7WhE7PUnEoCRRz+D0KkSk1Ss//rEKHVIfPh8qQOP7+ody5U7HzvGf00kl7gxHb8/RLJs8/fUgQxfwPrqq6/irrvuQkVFBbZs2YIJEybgiiuuQFOT/NoNI6jFrnJxRNN+1LbTuh9VdolRmhX7ZLzU3vj9ISIDGV6M/OpXv8L8+fNx8803IxAI4KmnnsKgQYPwu9/9zuhDR1GLXTXhdE37iWwXKykTz35UnYxRyqV2TGNW7JPxUnvj94eIDGToNE1nZyc++eQTLFmypOdzHo8Hl112GWpra/tt39HRgY6Ojp6Pg8GgbmNRa2C3WSxCvZStumZkT0Yxyk5skk3KbBt8EaT0AoU1IwAEDyRJjBkVkwAIJ2OUtlgwZVJjOdOOQwPD7w8RGcjQOyP/+te/EAqFMGxY9G9Lw4YNQ2NjY7/tKysrkZWV1fNn5MiRuo3F6xFwrKNb9nURHjzQPTf8//v8Wxv5+EX/bagqacKTqcuR16f5XB5a8GTqcvzPuQ0Q8r+pOJbm1PyYS0ckCYAUbnBXXddkj8ZMJjWWM+04NDD8/hCRgWz10LMlS5agtbW158/+/ft123fL0U4E2+WLEQB4L1SC8q5FaET0w2sakYPyrkV4u2Mivvn5wxCE2M3nBEHAxG2VwN+rZY8hAcjuCD+CXeizj8jHhQ3VWPrG3+zTmMmkxnKmHYcGht8fIjKIodM0Z5xxBrxeLw4ejF7UdvDgQeTl5fXbPi0tDWlpaYaM5T+f2ahpu3ViCWo6zkOJZwdycQRNOB2bxSKI8OBClaSMAAk4qnzHQuj5H5nXT6Z2Co//DQ0KqR3TGzOZ1FjOtOPQwPD7Q0QGMLQY8fl8OPfcc/H+++/jmmuuAQCIooj3338fCxcuNPLQ/TS1dWreVoQHm8T+hYBuSRkNtBzL9MZMJjWWM+04NDD8/hCRzgx/zshdd92FefPm4bzzzkNJSQmWL1+OY8eO4eabbzb60FFyM304cqIroX3olpTR6VhaGzM57kE9OjwIjoiI7MPwYuT666/HoUOHcN9996GxsRHf/OY3UV1d3W9Rq9Fe+cFkTPpFTUL72D94wsmkjEKiYHAecOyg7BNWpZP/I6H/uhPgVGpn76DxEI52yx0FeRobM9kikaMn9kYhInIcUxawLly4EHv37kVHRwf+8pe/4IILLjDjsFGyB/vgT1euvVJUrkZgxBAIaomCK/8P8PUy2X0IAOpPOxuAfGqnduzdqLhmvNJRNDVmqt7eYI9Ejl4ivVH6xqaDDeHP1621ZlxERJQQW6VpjBQSJZyWplyMdMu1iznp/S+a0Pn1q5QTBUUzgIativsZnhLEG197EAdjpHbWjK3Ed+bcgbLifFTNmYS8rOipmLys9HAzPpW7GiFRwtK36uyTyEmUGArfEVE6o+p72M6eiCgJmdKbxg7UetNoIUrAS7V7cOtUhUTB7o9UengACB7At2dPQOf1f8c7fRrYfadXA7uy4nxMC+QNaL2H2vlakshJBHujEBE5lmuKEb2SJ3tbjof/j1yiII4eHloa2A20MZPW8zU9kTNQ7I1CRORYrpmm0Zo8UVOYPUh5A5v08NB6vnpdF8PZ5LoSEZH+XHNnJNKbprG1HQLEfg81k+CJuRqhN48A3FQ6GoBCXDaOHh56RW5j7af3+SaayLEF9kYhInIs1xQjXo+AipkBrFn1FCpiNLlb2jUXYtFVqKlrkt3H/Klj4EvxqMdly5aF0x0QEP3GeaqHR3Vdky6RW6WxVMwMoHzlFrlRaErk2EakN8rqm2Q2kNgbhYgoSblmmgYAPl23AlUyTe6qUpdj8D/eU/z6f/zrmLa4rEoPj2rxfF0it2pjAZBQIoeIiMgMgiT17R1rH8FgEFlZWWhtbYXf709oX0ePdyC4rAh5aFF82NiUjt9AVKjRhmWm4WBbR8zXIlMfGxZfGr7jEONJoSF4MGXZB7JJl377kBESJc37AZD8T2AVQ8DyYoVEzclpmkXbeHeEiMhi8b5/u+bOSNWK8NSM3HuwRwAKhGaUeHYo7keuEAGi47LhnZ5M3JzzH+H/erxxRW6VxLOfSCLn6m8OR+lZOclXiADxRXuJiCipuKYY6Q42atpOj2Z4SnFZvSK3jovuqmG0l4jIsVyzgDXFnwccV99Oj2Z4kbhsrJSLXpFbW0Z3jWxgx2gvEZFjuaYYKZ87F/XLHlJdM7JZLFLcz7DMNDS1dajGZeVSLvfOCOgSubVddNfoBnaM9hIROZZrpmkGD0pDtTAlHHPt814mSeE377XdpYqLV/3pKVh69TgAyg3sauoaZVMuC1ZtwawJ+ar7UFvXEYkqJ7ofXZjRwC4S7QUge8aM9hIRJSXXFCMn2jtRJm0AAAh93ssiH89KqYUH8t3ygu3dmPK1oYpx2WmBPNUGdWs/a8ATN05MOHKbaDM9XZjZwE4lMq3LHRgiIjKda6ZpVr32Mm4V5BMqggAUIJym2SQGZLf70auf4tl558s2sKvd1awp5TLktDRsWHxpwpHbRJrp6cLsBnYBhSaFRESUlFxTjLQf1vYgMbU0zb7DJwDIN7CLJ+Uy0CZ4fem1nwGxIuUi16SQiIiSkmuKkfQh+YDyozsAqKdpRg3JACDfmyZZUy4D7pMTb8qluxP4+Fng8B5gyGjg/PlAii+u0yEiImdxTTFy47U3oL7y/oTTNL++fqJiP5hpgbykS7mo9tpREk/K5Q/3ArX/A0i91uX84WdA6ULg8gcGfo5ERJTUXLOANSPdhyfTvw8gXHj0Fvn4odBcxTTN+BF+bNh5SLEfTE1dY1KlXDT12lGiNeXyx/uBjY9FFyJA+OONj4ULFSIiciXXFCOd3SJWBb+J8q5FaET0XYlG5KC8axHeDZ2P4oLBMb9+/Ag/1twxRTUps/StOkwL5CVFykWqvgcPrN2mej6hvtVbX2opl6+Xhe+IKKl9IjyFQ0REruOaaZqXavdAlIB1UglqOs5DiWcHcnEETTgdm8WinjsisyeOxCs/GIUfvfop9h0+gVFDMvDr6ydicHqK5qTM5t0tSZFyEYIHMLLzMxxA7PRQ7/NRXSCrlHKpfaL/HZF+BwuF15KULlDejoiIHMc1xcjellPPghfhkY3v7m05jsHpKXh23vn9Xou3H0wypFy09OLR3N9GLuVyeI+2r9e6HREROYprpmkKswclvJ0tkzJyNKZctPTiSfh8hozWdzsiInIU19wZual0NB589wuIEpCCbsz1/gGjhCbsk3KxInQ5upECjxDeTk68/WDU4rKd3SJeqt2DvS3HUZg9CDeVjoYvJf76MOZxNKRcJH8B9rdPgBDsMjb5c/78cGpGaapG8Ia3kzsfs6a33M7IZodERDJcU4z4UjyYP3UMhvzvg5if8g68wqm335+m/F882z0Dhy/6qWIxEOkHU75yS7jHTa/X+iZl1OKyle/W4dmPdkclex589wvMnzoGS66UfwJsX4rHKVsWTs3IjFYoexj3iudoOp+EpPjC8d2Nj8lvU7oASPElFjOmxBjd7JCISIYgSX3bxtlHMBhEVlYWWltb4ff7E95f9a/n44ojqwFE96eJXIF1p1+Hsh89q74flTfMSFy274WNHPKyQC5q6ppk93/bxdoKErXjVM2ZhDLPxzHeYIaH47Z6PGckHrGeMyJ4w4XI5Q9oOx8WJMaIxMDlrj57/xBRHOJ9/3ZNMXLi+An4luXDA6lfozwgXJCE4EHX4npkDMpQ3Z/cVEJIlDBl2QeKqRs1HgHY8cB0xbs0aseJTLFsWHwpvBCNewJrvGSewBrX+XDKRl9iCFherJC+OvngukXbOGVDRJrE+/7tmmmaD1Y+iBmCfN0lCEAKRKxb+SBm/OAXqvuTS8ps3t2SUCEChB/C9lLtHtw69UzZbdSO0y+Wq9LLxbTkT4ovZnw37vMh/Zjd7JCIqA/XpGlSW/fqup0czTFYFb2jyIkcR6/xGM1p55NUrGh2SETUi2vujHRlFQLHNG6nhUzqoHcM1gNR9uFqanoixhqOo8QWMWMNnHY+ujMy5RJvs0MiIp25phi5dM5PEVr2P6prRi6d81P1nSmkDkqKZiI/Kx3j2z5EReoKFAinWgXXS9lY2jUX68QSxd33RIw1HMc2DfkSFG9s2lWMTrnE0+yQiMgArpmmyRiUgVdTrwZwKj0TEfl4deos9cWrKs3nvDvewt0jv0RV6nLkoSVqkzy0oCp1OW4bul3xEPOnjoHv72+rHsc2Dfl0EIlNA844H91oaHaYMK3NDrl4lYgM4ppipLNbxM+OXYenu6+C2Ocf3BA8eLr7Kvzs2HXo7FZ4MJeW5nPvLcaUnY8CCN/h6C3y8c1tT+MHU0bFfP22i8dgSdk3VI+D6ntQFsi1viGfjsqK8x11PgnT8POG6nvC2yVKrdkhY71EZCDXRHuf++gfeOCdLwDIP4EVAO6dcbZ8imX3R8CLVyU0jojPp63C2Aumx34Cq9bjzHsbGDPVcU8sddr5DFicPwe64BNYiUgHjPbK6J1O6UYKfhe6UnW7fnRME5w4fAC+FE/swifOdIOlDfkM4LTzGTArUi5yzQ6JiAzkmmKkdwM8pZRLYfYghLq7seMv63Di8AFkDBmOoguugDclRdc0QcaQ4fIvxptu4G+z/Tji7gpTLkTkEq4pRiKN8qYJm2VTLjVSCca3fYh//eLnGIfmntcP1uSgvrQCE6fNUW8+l5mPprYODJWa+60JAcIPNGsSclB0wRXyg40n3cB+Iv04pr8NUy5E5BKuWcDqS/HgPzK2KKZcfpb2Cs79y50YKjVHvT5UasaEjT/EpzUrVVMHwvRlqC+tAICoJni9P24orQjfaZGjNd2w4x3jkxZJJtLfpu/TXBtb21G+cguqtzdYNLIBYMqFiFzCNcXI0eMdWBT6HQD5lMs86W3F1/NrlyL09StVUwcTr5iHzyY/hkNC9LqHJiEHn01+DBOvmKc+YLV0Q9EM85IWSSIkSlj6Vp3SFcHSt+oQ6lsl2hlTLkTkAq5J0zzy1LP4cePdCY/p82mrMO6iGZrWaciuPYmH3HGsSFrYXO2uZtzw7CbV7V6ef2HyLZDluiAiSiJM08joDjbqsp8Thw+E/4+G1IE3JSVcuCRC7jjsJ9KPo/vbMOVCRA7mmmmaFH+eLvtRTMGYiUmLftjfhogoObmmGCmfOxf1Una/RaURogSEJI/i641QScGYKZK06LewMUIA/MP1TVqIofD00LbXw/+12XqUSH8bhSuC/Dj724RECbW7mvHm1gOo3dVs3HoTm19bIiIjuWaaZvCgNDzuvx2Lgw9BlKIXqUbeX15NnYX/7HpD9vWG0grkxbvmwyiRpMXqm2Q2kPRNWiRBhDjS36Z85RYIiF7aO5D+NqZFhJPg2hIRGck1d0YAAGfPRHnXIjQi+jfjRuSgvGsR/jRqoeLrB4dfbuZo7cOMZm060au/jWkR4SS6tkRERnFNmqazW0TRve+F73ooPIEViP26BA/ystKxYfGl9niSpxgClhf3fxPrcfKBWIu2JXZ3xKzj6CyRJ7CGRAlTln3QrxCJEAB9fhaS9NoSEalhmkbGS7V7eqZbRHiwSQzIbiv3ekNrOzbvbrFHLHTvRoU3MQCQgOCB8HaJpDDMOo7OEulvs3l3i2whAoSnf3T5WUjSa0tEpDfXTNMoNsCLg21ioWZFe10YITYtIuzCa0tEFItr7oz0bpSXgm7M9f4Bo4Qm7JNysSJ0Obo1XopILLSzsws11WtwvPkABuUMx7Sy2fD5UqO2VZsqSKiZm1nR3iSNECdybU2LCCfptSUih7DRwxRdU4xEGuX9f55VmJ/yDrzCqaUyP035v3i2ewYeDt0o+/WRdQIlY7Lx+5VPYvJXj2BGpNneXqDhk8XYOPbH+M6cOwCoJzESTmqY1UQtCZu1JXptIxHhxtZ2uTPu+VlISBJeWyJyCJul+FwzTeNL8eCRrNdxW8rb8PT5h98DCbelvI2HBq+GANmWZKiYGcAbq6ow+6slGNan2d4wtGD2V0vw+5VPqiYxKt+tSzypYVYTtSRr1qZHCiYSEQaUfxYSXsicZNeWiBzChik+1xQjne3tuObEGgCA0Off/cjH13WtxePXBWRjoZd+/QxM/uoRAPLN9Eq/ehRL3/ibYrO2Zz/arU8zN7OaqCVJszY9G+XpFRFWlSTXlogcQgzZssmqa6ZpPvn9IygV5N+EBAFIgYicL1Ziw+KfxVxv8M7a1aemZmLwCEABmlF4/G9oQOy0jgRAKUwdd1IjMCvcwdfoeT+zjpMAvVMwZcX5mBbIG/i6Hq2S4NoSkUPYNMXnmmJEOLxb83ZysdDjzQc07SMXR+IZWkxxJTXMaqJm82ZtRqRgEokIx8Xm15aIHMKmKT7XFCPSkDHAv7RtJ5eUGZQzHNirvo8mnJ7weNnMLX5ObpSXUPKKiCjCpik+1xQj537nxwhV/hIeSP3WjADhqZMQPPhndyZGP/j1mEmZmdfNR8MnizEMLf3WjACnmuntHTQewtFu2SSGIEC2IR8QfzM3CjMtBWMy03rkEJHz2TTF55oFrL70dGz2lQDov2Yj8vEOYQy+848K2aTMW6ufxZq8/wLQv5iIfPxG3kJUXDMegHwS41tn5yqOddaEfP7WOwCmpWBMZFqPHCJyB5um+FxTjHR2dmF051eQEDtNIwE4WwqvK1FKyqxsKVZspvfSkQmYFsiTTWI8ceNEbD8QVBzr2s8ajGtV73CmpWBMoGc6iIiohw1TfIZN0zz44IN45513sHXrVvh8Phw5csSoQ2lSU71GNQkDiIqvF6AZo459hnViCWo6zovdbO9kWkMuiaGW+ABs1gMnCZmWgjGYaT1yiMh9bJbiM6wY6ezsxLXXXovS0lI899xzRh1GM61JGDWRpIxSs71IWiNWEsO0vicuZ1oKxkD8WSEiQ9koxWdYMbJ06VIAwAsvvKD5azo6OtDR0dHzcTCoPJ0RD61JGDVakjJKaQ0nJz5IX/xZISK3sNWakcrKSmRlZfX8GTlypG77nlY2Gw1StmyKRZSAkORRfL1eysG+0yb0W/ITIUA9CRNJfCSyDyN0dot47qN/4L43t+O5j/6Bzm75KSsyhyU/K2II2P0RsO318H9NfgojEbmTrYqRJUuWoLW1tefP/v37ddu3z5eKjWN/DEA+TfNGxjUA5JMytWPvxn1XnwNg4GkNOyY+Kt+tQ9G97+GBd77Aitq9eOCdL1B073uofLfOtDFQf6b/rNStBZYXAy9eBfz+1vB/lxdb0qeCiNwlrmLknnvugSAIin927Ngx4MGkpaXB7/dH/dFTx56/KP6W2dEtYs3YShyMkZRZM7YS35lzhy5pDTslPirfrcPTH+6OWYA9/eFuFiQWM+1nxYaNs4jIPQRJUuqUEu3QoUNobm5W3ObMM8+Ez+fr+fiFF17AokWLBpSmCQaDyMrKQmtra8KFSWvwGAb/crjqQ8+O/fc/kZHui/kE1t70eCKm1U/V7OwWUXTve4oPYPMIwI4HpsOXYqubaK5j6M+KGArfAZHtV3HyIUiLtrFfDhFpEu/7d1wLWIcOHYqhQ4cOeHBW+v3TFbhFQ6O83z9dgVt+/ChmzLpOcX96pDWsTny8VLtHsRABwndIXqrdg1unnmnOoCgmQ39WbNo4i4jcw7A0zb59+9DS0oJ9+/YhFAph69atAICvfe1rGDx4sFGHlZV54p+6bucEe1uO67odJSmbNs4iIvcwrBi577778OKLL/Z8PHHiRADAn/70J1xyySVGHVZWW8YI4JjG7TSweopFD4XZg3TdjpKUTRtnEZF7xLVmxGxWrRnJ8p+muC+nNC7jmhEC0GvNiErjLK4ZISKN4n3/ds07TJb/NLwozAQgH+1dIVylqRBxSuMyX4oH86eOUdxm/tQxLESczqaNs4jIPVzzLtPZLeIXHf+Jp7uvgtjnH9wQPHi6+yr8ouM/FR/25cTGZUuuDOC2i8fEbA5428VjsOTK2I+8J4exYeMsInIP10zTPPfRP/DAO18AAFLQjbneP2CU0IR9Ui5WhC5H98nlM/fOOFs2OVK7qxk3PLtJ9Vgvz78w6fqidHaLeKl2D/a2HEdh9iDcVDqad0TcSAzZpnEWESUvQ6O9yax3IqQbKfhd6ErV7fpycuMyX4qH8V2yVeMsInIP1xQjvRMhHogo8exALo6gCadjs1gE8eSMlVJyRO/GZU5I5BARESXKNcXITaWj8eC7X2CasBkVqStQILT0vFYvZWNp11zUSCW4qXS07D4ijcv6Ll7tTWvjMqckcoiIiBLlmkUBvhQPbvRvRVXqcuShJeq1PLSgKnU5bvRvVVwn4fUImDVBuVCYNSFf9e6GkxI5REREiXJNMXKivRN3tP8WAGImRwDgjvbncKK9U3YfIVHC2s+UC4W1nzUopmmcmMghIiJKhGuKkVWvvYwCoaVfIRLhEYACoRmrXntZdh+bd7coTtEAQENrOzbvbpF9XW0fkoZ9EBEROYlripH2w9qmPpS20yNN4+REDhER0UC4phhJH6JtUajSdnqkafRO5BARESU716Rpbrz2BtRX3o88xJ6qESWgETm48dobZB8AFknTNLa2y3XwQF6vNE2s6K4e+0jm+K/TzsdW+MAyIkpSrilGMtJ9eD7vv3B741KIUvQi1sha0TfyFqL1g5149qPdUc3jHnz3C8yfGn40esXMAG5fuSXmMSQAFTMD8HoExehuxcwAyldugYDotmSRIWnZRzLGf512PrZStxaoXgwE6099zl8Q7jnDR7kTkc25ZpoGAD49bSrKuxahEdHPAWlEDsq7FuHVo9/E0x/u7tfFVpSApz/cjcp36zQdRy26CwBVcyYhLyt6KiYvKx1VcyahrDjfcfFfp52PrdStBVbPjS5EgHAX3tVzw68TEdmYa3rTnOgM4ez7qgEoP4FViUcAhg724WBb7PivAGCYPw2AgMZg7AWokWmYDYsvBYCYUxYhUcKUZR/Ipm567yMZpjicdj62IoaA5cX9C5EeQvgOyaJtnLIhItOwN42Mh3rd1RDhwSYx/m60ogTZQgQIT7k0BjsU99E7ult6Vk7MhnrxxH+ToSGf087HVvZuVChEAEACggfC27HnDBHZlGumafY0yzfAs4Kb4r9OOx9bOXpQ3+2IiCzgmjsjo3MG4aOvrB7FKXrGf+2eULFrnNnu102TwcP03Y6IyAKuKUZ+cmUAL23al9A+ImtGmto6ZWO5kTUjB4PaoruxxBP/TYaESrxxZjMkw3XTpHByeE1IsAGQu7r+gvB2REQ25ZppmgyfF/505drL51X+rXj+1DFYenUxgFMx3IjIx/fPGof7ZwUUt4lEd+V4PQIqZqrvo6auMSkSKlrPx6y7Eo5K9ni84fguANmrW/YwF68Ska25phg52t6NYHu34jadIQnfm1wYs5HebReHnzNSVpyvGsvVso0atX1MC+QlVcM9Pa6JHhzZqDAwC7huBeDvcw39BeHP8zkjRGRzron2zn/xY9R80aS63bSzc/HEd8+N+QTW3rSsN9BjTYLcPmp3NeOGZzepfv3L8y+0VULF6nUayXrdNOETWInIJhjtlbHv8AnN2/lSPLh16pmK23k9guqblZZt1MjtI1kTKnpck0Qk63XTxONlfJeIkpJripFRQzLwZWObpu2s/u1dC7smVOyO142IyH5cU4z8+vqJKL5/nep2M8bl93taqB1TFnZMqCQDXjciIvtxzQLWwekpKMzJUNxm6GAffvT6Z0mRsrBbQiVZ8LoREdmPa4qRkCihs1t5rW7zsdjPD7FrysIuCZVkw+tGRGQvrpmmUeuPAqBft97e7No/paw4H9MCebZf42I3vG5ERPbhmmJEr3SEHVMWVidUkhWvGxGRPbhmmkavdARTFkRERPpyzZ0RLSkKQZCfqmHKwj6SIXpNRETauaYYiaQobl+5JebrEoAfTB2DZz7c3fNxBFMW9uGYBndERNTDNdM0WkwcNYQpCxtzVIM7IiLq4Zo7I5EGaXIEhKO7GxZfypSFDak1uIt8/6YF8vi9IiJKMq4pRtSivX2ju0xZ2Eu83z8iIkoerpmmcXSDNBfg94+IyLlcc2ekdyTXAxElnh3IxRE04XRsFosgnqzLcjPTHZfW0O18VFrUG3nd2OCOjOK0v+9Eycg1xUgk2ju+7UNUpK5AgdDS81q9lI2lXXPxt8yLcfhYZ1I0ytNKt/RJ3VqgejEQrD/1OX8BULYMCMwyPOXCBndkBKaziOxBkCTJPs1W+ggGg8jKykJrayv8fn/C+/v9yicx+6slAIDev/hEni3yVF4FHtn7jX5vdpFNky1RE0mfJHw+dWuB1XMBmT19WvobfPtPZxh+3SLnA8SOXifb94espdvfDyLqJ973b9esGQl1d2PKzkcBRBcivT++pvF/IEDs97V2bZSnRC19Amg8HzEUviMisycJQH7tUlOuGxvckV50+/tBRLpwzTTNjr+swzg09+8bf5JHAArQjBLPDmwSA/1eT7a0hm7pk70bo6dm+hAgIc/E68YGd6QHprOI7MU1xciJwwc0bZeLI4qvJ0taQ7f0ydGDmvZj5nVjgztKFNNZRPbimmmajCHDNW3XhNMVX0+WtIZu6ZPBwzTtxynXjdyB6Swie3FNMVJ0wRU4iBzZRniiBNRLOfhYLIr5uoDwKvtkSWtE0idykxeaz6dwcjg1I7MnCQIakYPNMtcNWo9DZCLd/n4QkS5cU4x4U1Kw4Wt3A+jfmTfy8Rt5CyHC0+8fqGRslBdpDAj0LyPiOh+PNxzfldmTAOB/v3Z3z3NaYpk1IT9prhu5g25/P4hIF64pRkKihEf3fwPlXYvQiOjfdhqRg/KuRXjpyAQ8ceNEx6Q1dEufBGYB160A/H229xcgdO2LeHT/NxS/fO1nDUwlkO0wnUVkH655zkjtrmbc8OwmAMpPYH15/oUoGZPtqLSGkU9grd19pOe6Knl5/oVcdEq2xCewEukv3vdv16Rpeq+KF+GJGUONbOe0tIZu5+PxAmOmRn2KqQRKdk77+06UjFwzTcPV88bgdSUiokS5phjh6nlj8LoSEVGiXFOMcPW8MXhdiYgoUa4pRgCunjcKrysRESXCNWma3rh63hi8rkREBDBNowlXzxuD15WIiAbCVdM0REREZD+uvDOiBz2mJDitQURExGJkQKq3N2DpW3VoaD31IK/8rHRUzAxoXqypxz6IiIicwLBpmj179uDWW2/FmDFjkJGRgbPOOgsVFRXo7Ow06pCmqN7egPKVW6KKCABobG1H+cotqN7eYMo+iIiInMKwYmTHjh0QRRFPP/00Pv/8c/z617/GU089hZ/85CdGHdJwIVHC0rfqECt+FPnc0rfqFJvC6bEPIiIiJzFsmqasrAxlZWU9H5955pn48ssvUVVVhUcffdSowxpq8+6WfnczepMANLS2Y/PuFtlUiR77ICIichJT14y0trYiO1v+seAdHR3o6Ojo+TgYDJoxLM30aArHxnJERETRTCtGdu7ciccff1zxrkhlZSWWLl1q+FjUUixyr8fbFC7WfthYjoiIKFrcxcg999yDZcuWKW7zxRdfoKioqOfjAwcOoKysDNdeey3mz58v+3VLlizBXXfd1fNxMBjEyJEj4x2iIrUUi9Lr0wJ5yM9KR2Nre8w1HwLCj0AvGZMtu597Z5yteR9ERERuEPfj4A8dOoTm5mbFbc4880z4fD4AQH19PS655BJceOGFeOGFF+DxaF8zq/fj4CMplr4nHLkn8oOLx+CZD3fLvl41ZxIAoHzlFgCI2i7WNmrHUdoH471ERJSs4n3/NrQ3zYEDB/Dv//7vOPfcc7Fy5Up4vd64vl7PYiQkSpiy7APFxaMeAZALsUTuWGxYfClq6hoV754oHSeyn3tnBPDAO3zOCBEROY9tetMcOHAAl1xyCQoLC/Hoo4/i0KFDPa/l5eUZdVhZaikWQL4QAaJTLmXF+ZgWyIu5rqR2V7OmtMyQ03zYsPhSPoGViIhcz7BipKamBjt37sTOnTsxYsSIqNesaBSsVzolsh+5pnDxpGXYWI6IiMjAh55973vfgyRJMf9YQa90itp+mJYhIiKKj2u69paMyUZ+VjqUJkE8AmRfFxBe06GWclE7jtb9EBERuYVrihGvR0DFzACA/gWHcPLP/KljZF8HgIqZAdU1HWrH0bofIiIit3BNMQIAZcX5qJozCXlZ0VMkeVnpqJozCUuuDCi+rjXlonYcpmWIiIhOMTTamyi9nzMSMdAnsOp9HCIiIieyTbTXztRSLHqlXJiWISIiUueqaRoiIiKyHxYjREREZCkWI0RERGQpFiNERERkKRYjREREZCkWI0RERGQpFiNERERkKRYjREREZCkWI0RERGQpWz+BNfKk+mAwaPFIiIiISKvI+7bWjjO2Lkba2toAACNHjrR4JERERBSvtrY2ZGVlqW5n60Z5oiiivr4emZmZEATnNpgLBoMYOXIk9u/fr2tDQLvjefO83cKt587zdu95Z2Zmoq2tDQUFBfB41FeE2PrOiMfjwYgRI6wehmn8fr+rfnAjeN7u4tbzBtx77jxvd4mct5Y7IhFcwEpERESWYjFCRERElmIxYgNpaWmoqKhAWlqa1UMxFc+b5+0Wbj13njfPWytbL2AlIiIi5+OdESIiIrIUixEiIiKyFIsRIiIishSLESIiIrIUixEiIiKyFIsRiz3xxBMYPXo00tPTccEFF2Dz5s1WD8lwH374IWbOnImCggIIgoA33njD6iGZorKyEueffz4yMzORm5uLa665Bl9++aXVwzJcVVUVxo8f3/NUxtLSUrz33ntWD8t0Dz/8MARBwKJFi6weiuHuv/9+CIIQ9aeoqMjqYZniwIEDmDNnDnJycpCRkYFzzjkHf/3rX60elqFGjx7d7/stCAIWLFigeR8sRiz06quv4q677kJFRQW2bNmCCRMm4IorrkBTU5PVQzPUsWPHMGHCBDzxxBNWD8VU69evx4IFC7Bp0ybU1NSgq6sLl19+OY4dO2b10Aw1YsQIPPzww/jkk0/w17/+FZdeeimuvvpqfP7551YPzTQff/wxnn76aYwfP97qoZhm3LhxaGho6PmzYcMGq4dkuMOHD+Oiiy5Camoq3nvvPdTV1eGXv/wlhgwZYvXQDPXxxx9Hfa9ramoAANdee632nUhkmZKSEmnBggU9H4dCIamgoECqrKy0cFTmAiCtWbPG6mFYoqmpSQIgrV+/3uqhmG7IkCHSb3/7W6uHYYq2tjZp7NixUk1NjfRv//Zv0p133mn1kAxXUVEhTZgwwephmG7x4sXSlClTrB6G5e68807prLPOkkRR1Pw1vDNikc7OTnzyySe47LLLej7n8Xhw2WWXoba21sKRkVlaW1sBANnZ2RaPxDyhUAivvPIKjh07htLSUquHY4oFCxZgxowZUX/X3eCrr75CQUEBzjzzTHz3u9/Fvn37rB6S4dauXYvzzjsP1157LXJzczFx4kQ8++yzVg/LVJ2dnVi5ciVuueUWCIKg+etYjFjkX//6F0KhEIYNGxb1+WHDhqGxsdGiUZFZRFHEokWLcNFFF6G4uNjq4Rhu27ZtGDx4MNLS0nD77bdjzZo1CAQCVg/LcK+88gq2bNmCyspKq4diqgsuuAAvvPACqqurUVVVhd27d2Pq1Kloa2uzemiG+sc//oGqqiqMHTsW69atQ3l5OX74wx/ixRdftHpopnnjjTdw5MgRfO9734vr61KMGQ4RKVmwYAG2b9/uinl0APjGN76BrVu3orW1Fa+//jrmzZuH9evXO7og2b9/P+68807U1NQgPT3d6uGYavr06T3/f/z48bjgggtQWFiI1atX49Zbb7VwZMYSRRHnnXceHnroIQDAxIkTsX37djz11FOYN2+exaMzx3PPPYfp06ejoKAgrq/jnRGLnHHGGfB6vTh48GDU5w8ePIi8vDyLRkVmWLhwId5++2386U9/wogRI6wejil8Ph++9rWv4dxzz0VlZSUmTJiA3/zmN1YPy1CffPIJmpqaMGnSJKSkpCAlJQXr16/HY489hpSUFIRCIauHaJrTTz8dX//617Fz506rh2Ko/Pz8fgX22Wef7YopKgDYu3cv/vjHP+L73/9+3F/LYsQiPp8P5557Lt5///2ez4miiPfff981c+luI0kSFi5ciDVr1uCDDz7AmDFjrB6SZURRREdHh9XDMNS3vvUtbNu2DVu3bu35c9555+G73/0utm7dCq/Xa/UQTXP06FHs2rUL+fn5Vg/FUBdddFG/uP7f//53FBYWWjQicz3//PPIzc3FjBkz4v5aTtNY6K677sK8efNw3nnnoaSkBMuXL8exY8dw8803Wz00Qx09ejTqN6Tdu3dj69atyM7OxqhRoywcmbEWLFiAVatW4c0330RmZmbP2qCsrCxkZGRYPDrjLFmyBNOnT8eoUaPQ1taGVatW4c9//jPWrVtn9dAMlZmZ2W890GmnnYacnBzHrxO6++67MXPmTBQWFqK+vh4VFRXwer244YYbrB6aoX70ox9h8uTJeOihh3Dddddh8+bNeOaZZ/DMM89YPTTDiaKI559/HvPmzUNKygBKC+PCPaTF448/Lo0aNUry+XxSSUmJtGnTJquHZLg//elPEoB+f+bNm2f10AwV65wBSM8//7zVQzPULbfcIhUWFko+n08aOnSo9K1vfUv6wx/+YPWwLOGWaO/1118v5efnSz6fTxo+fLh0/fXXSzt37rR6WKZ46623pOLiYiktLU0qKiqSnnnmGauHZIp169ZJAKQvv/xyQF8vSJIk6VMXEREREcWPa0aIiIjIUixGiIiIyFIsRoiIiMhSLEaIiIjIUixGiIiIyFIsRoiIiMhSLEaIiIjIUixGiIiIyFIsRoiIiMhSLEaIiIjIUixGiIiIyFL/D5nm2vHYVXmpAAAAAElFTkSuQmCC",
+      "text/plain": [
+       "<Figure size 640x480 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "plt.scatter(X[y==0, 0], X[y==0, 1])\n",
+    "plt.scatter(X[y==1, 0], X[y==1, 1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.neighbors import NearestNeighbors"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_min = X[y == 1]\n",
+    "X_min = StandardScaler().fit_transform(X_min)\n",
+    "indices = NearestNeighbors(n_neighbors=5).fit(X_min).kneighbors(X_min, return_distance=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def joint_neighborhood_ratios(X, y):\n",
+    "\n",
+    "    X_min = X[y == 1]\n",
+    "    X_min = StandardScaler().fit_transform(X_min)\n",
+    "    indices = NearestNeighbors(n_neighbors=5).fit(X_min).kneighbors(X_min, return_distance=False)\n",
+    "    count = 0\n",
+    "    count_none = 0\n",
+    "    for idx, inds in enumerate(indices):\n",
+    "        flag = False\n",
+    "        for ind in inds[1:]:\n",
+    "            if idx in indices[ind]:\n",
+    "                count += 1\n",
+    "                flag = True\n",
+    "        if not flag:\n",
+    "            count_none += 1\n",
+    "    return count/len(X_min), count_none/len(X_min)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def comparison(X, y):\n",
+    "    X = StandardScaler().fit_transform(X)\n",
+    "    X_min = X[y == 1]\n",
+    "    X_maj = X[y == 0]\n",
+    "\n",
+    "    indices = NearestNeighbors(n_neighbors=5).fit(X_min).kneighbors(X_min, return_distance=False)\n",
+    "    all_points = (X_min[:, None] + X_min[indices[:, 1:]]) / 2\n",
+    "    all_points = all_points.reshape((all_points.shape[0]*all_points.shape[1], all_points.shape[2]))\n",
+    "\n",
+    "    indices = NearestNeighbors(n_neighbors=5).fit(X).kneighbors(all_points, return_distance=False)\n",
+    "    return np.bincount(y[indices.ravel()])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([776, 844])"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "comparison(X[:, :], y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging\n",
+    "logger = logging.getLogger('smote_variants')\n",
+    "logger.setLevel(logging.ERROR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class RandomForestClassifierImproved:\n",
+    "    def __init__(self, smote, min_samples_leaf=5, random_state=None, bootstrap=True, n_estimators=200):\n",
+    "        self.smote = smote\n",
+    "        self.min_samples_leaf = min_samples_leaf\n",
+    "        self.random_state = random_state\n",
+    "\n",
+    "    def fit(self, X, y, sample_weight=None):\n",
+    "        self.estimators = []\n",
+    "        self.masks = []\n",
+    "        for idx in range(1000):\n",
+    "            estimator = OperatorDecisionTreeClassifier(max_features='sqrt', min_weight_fraction_leaf=2/len(X), mode='avg')\n",
+    "            bootstrap = np.random.randint(0, X.shape[0], X.shape[0])\n",
+    "            X_train = X[bootstrap]\n",
+    "            y_train = y[bootstrap]\n",
+    "\n",
+    "            \"\"\"X0 = X_train[y_train == 0]\n",
+    "            X1 = np.unique(X_train[y_train == 1], axis=0)\n",
+    "            #X1 = X[y == 1]\n",
+    "\n",
+    "            #bootstrap = np.random.choice(np.arange(X1.shape[0]), X1.shape[0], replace=True)\n",
+    "            #X1 = X1[bootstrap]\n",
+    "\n",
+    "            X_train = np.vstack([X0, X1])\n",
+    "            y_train = np.hstack([np.repeat(0, X0.shape[0]), np.repeat(1, X1.shape[0])])\"\"\"\n",
+    "\n",
+    "            smote = SMOTE(\n",
+    "                random_state=5,\n",
+    "                n_neighbors=1,\n",
+    "                #proportion=2.0,\n",
+    "                #proportion=1.0 + (np.random.random_sample()-1)*0.5,\n",
+    "                ss_params={'gaussian_component': {'sigma': 0.0, 'fraction': 1.0}, 'n_dim': 2},\n",
+    "                nn_params={'metric_learning_method': 'n_unique_inv', 'metric': 'precomputed', 'random_state': 5}\n",
+    "            )\n",
+    "            #X_train = X_train + np.random.random_sample(X_train.shape)\n",
+    "\n",
+    "            X_train, y_train = smote.sample(X_train, y_train)\n",
+    "            \"\"\"X0 = X_train[y_train == 0]\n",
+    "            X1 = X_train[y_train == 1]\n",
+    "            X_train = np.vstack([X0, X1, X1])\n",
+    "            y_train = np.hstack([np.repeat(0, X0.shape[0]), np.repeat(1, X1.shape[0]*2)])\"\"\"\n",
+    "            sample_weight = np.hstack([np.repeat(1.0, X.shape[0]), np.repeat(0.5, X_train.shape[0] - X.shape[0])])\n",
+    "            estimator.fit(X_train, y_train, sample_weight=sample_weight)\n",
+    "            self.estimators.append(estimator)\n",
+    "        return self\n",
+    "\n",
+    "    def predict_proba(self, X):\n",
+    "        return np.mean([estimator.predict_proba(X) for estimator in self.estimators], axis=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "306 283\n",
+      "[[ 2. 38. 59.]\n",
+      " [ 4. 39. 63.]\n",
+      " [ 1. 49. 62.]]\n",
+      "[31, 49, 12]\n",
+      "[[ 0. 45. 66.]\n",
+      " [ 1. 63. 60.]\n",
+      " [ 8. 69. 67.]\n",
+      " [ 5. 61. 62.]\n",
+      " [ 4. 53. 58.]]\n",
+      "[225  81]\n",
+      "(2.8395061728395063, 0.09876543209876543)\n",
+      "[772 848]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.603921568627451 0.6261437908496732 0.5986928104575164 0.6366013071895424 0.6431372549019607\n",
+      "0.7048499702911467 0.6802473262032085 0.7060606060606062 0.6983437314319668 0.6980912061794414\n",
+      "0.7140153283535637 0.6938920012449424 0.7196992685963274 0.714513305322129 0.7132878151260504\n",
+      "0.7095601412608056 0.6882827324478178 0.7127358739194604 0.7096787370862323 0.7104298439806029\n",
+      "0.7152807667782561 0.6921907380838515 0.718140243902439 0.7166596923322175 0.7168121313566077\n",
+      "0.7145577021658337 0.6937107522747661 0.7161163975394079 0.7154820261437908 0.7155180699730873\n",
+      "0.7156635058394943 0.6917838583520841 0.7169358727097397 0.7166552823315118 0.7171568627450982\n",
+      "0.7167403111479335 0.6933558869557213 0.7181556660222777 0.7167011875172605 0.7172713568995674\n",
+      "0.7176147825385298 0.6952004155571694 0.7183490680222706 0.718486242233519 0.7190576333414024\n",
+      "0.7159538174244058 0.6963720103425987 0.7170500251382607 0.7179047259929612 0.7188595309918838\n",
+      "0.7169400763605773 0.6975158545266289 0.718616935222934 0.7197368633922214 0.7209190772018379\n",
+      "0.7176750279691456 0.6972615998351293 0.7192898781134074 0.7188335394217749 0.7200505652711533\n",
+      "0.7181578485388647 0.6974392994112245 0.7200254550856154 0.7189937476367958 0.7202823691460055\n",
+      "0.7182694706381281 0.6967959512049094 0.7200749638277703 0.718701666417203 0.7203041585590979\n",
+      "0.7178423260557178 0.6960790108005377 0.7195139758030873 0.7177704769851204 0.7193691188059147\n",
+      "0.7194279422585812 0.6972403475739082 0.7211566138596719 0.7191076375362507 0.7207600203436783\n",
+      "0.7183610400682012 0.6962301587301588 0.7205182072829132 0.7178206065034709 0.7194951893800999\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[13], line 91\u001b[0m\n\u001b[1;32m     88\u001b[0m res0\u001b[38;5;241m.\u001b[39mappend(roc_auc_score(y_test, pred))\n\u001b[1;32m     90\u001b[0m classifier \u001b[38;5;241m=\u001b[39m RandomForestClassifierImproved(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(rfs_params \u001b[38;5;241m|\u001b[39m {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmin_samples_leaf\u001b[39m\u001b[38;5;124m'\u001b[39m: msl}), smote\u001b[38;5;241m=\u001b[39msmote3)\n\u001b[0;32m---> 91\u001b[0m \u001b[43mclassifier\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_train\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     92\u001b[0m pred \u001b[38;5;241m=\u001b[39m classifier\u001b[38;5;241m.\u001b[39mpredict_proba(X_test)[:, \u001b[38;5;241m1\u001b[39m]\n\u001b[1;32m     93\u001b[0m res1\u001b[38;5;241m.\u001b[39mappend(roc_auc_score(y_test, pred))\n",
+      "Cell \u001b[0;32mIn[12], line 36\u001b[0m, in \u001b[0;36mRandomForestClassifierImproved.fit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m     26\u001b[0m smote \u001b[38;5;241m=\u001b[39m SMOTE(\n\u001b[1;32m     27\u001b[0m     random_state\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m5\u001b[39m,\n\u001b[1;32m     28\u001b[0m     n_neighbors\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     32\u001b[0m     nn_params\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmetric_learning_method\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mn_unique_inv\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmetric\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mprecomputed\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrandom_state\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;241m5\u001b[39m}\n\u001b[1;32m     33\u001b[0m )\n\u001b[1;32m     34\u001b[0m \u001b[38;5;66;03m#X_train = X_train + np.random.random_sample(X_train.shape)\u001b[39;00m\n\u001b[0;32m---> 36\u001b[0m X_train, y_train \u001b[38;5;241m=\u001b[39m \u001b[43msmote\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_train\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     37\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"X0 = X_train[y_train == 0]\u001b[39;00m\n\u001b[1;32m     38\u001b[0m \u001b[38;5;124;03mX1 = X_train[y_train == 1]\u001b[39;00m\n\u001b[1;32m     39\u001b[0m \u001b[38;5;124;03mX_train = np.vstack([X0, X1, X1])\u001b[39;00m\n\u001b[1;32m     40\u001b[0m \u001b[38;5;124;03my_train = np.hstack([np.repeat(0, X0.shape[0]), np.repeat(1, X1.shape[0]*2)])\"\"\"\u001b[39;00m\n\u001b[1;32m     41\u001b[0m sample_weight \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mhstack([np\u001b[38;5;241m.\u001b[39mrepeat(\u001b[38;5;241m1.0\u001b[39m, X\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m]), np\u001b[38;5;241m.\u001b[39mrepeat(\u001b[38;5;241m0.5\u001b[39m, X_train\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m-\u001b[39m X\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m])])\n",
+      "File \u001b[0;32m~/workspaces/smote_variants/smote_variants/base/_oversampling.py:234\u001b[0m, in \u001b[0;36mOverSamplingBase.sample\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m    230\u001b[0m             _logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m: not enough dimensions \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m    231\u001b[0m                         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, X\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m])\n\u001b[1;32m    232\u001b[0m             \u001b[38;5;28;01mreturn\u001b[39;00m X\u001b[38;5;241m.\u001b[39mcopy(), y\u001b[38;5;241m.\u001b[39mcopy()\n\u001b[0;32m--> 234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msampling_algorithm\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/workspaces/smote_variants/smote_variants/oversampling/_smote.py:122\u001b[0m, in \u001b[0;36mSMOTE.sampling_algorithm\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m    118\u001b[0m nn_mt\u001b[38;5;241m=\u001b[39m NearestNeighborsWithMetricTensor(n_neighbors\u001b[38;5;241m=\u001b[39mn_neighbors,\n\u001b[1;32m    119\u001b[0m                                         n_jobs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_jobs,\n\u001b[1;32m    120\u001b[0m                                         \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mnn_params)\n\u001b[1;32m    121\u001b[0m nn_mt\u001b[38;5;241m.\u001b[39mfit(X_min)\n\u001b[0;32m--> 122\u001b[0m _, ind_min \u001b[38;5;241m=\u001b[39m \u001b[43mnn_mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkneighbors\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_min\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreturn_distance\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m    124\u001b[0m samples \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msample_simplex(X\u001b[38;5;241m=\u001b[39mX_min,\n\u001b[1;32m    125\u001b[0m                                 indices\u001b[38;5;241m=\u001b[39mind_min,\n\u001b[1;32m    126\u001b[0m                                 n_to_sample\u001b[38;5;241m=\u001b[39mn_to_sample)\n\u001b[1;32m    128\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m (np\u001b[38;5;241m.\u001b[39mvstack([X, samples]),\n\u001b[1;32m    129\u001b[0m         np\u001b[38;5;241m.\u001b[39mhstack([y, np\u001b[38;5;241m.\u001b[39mhstack([\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmin_label]\u001b[38;5;241m*\u001b[39mn_to_sample)]))\n",
+      "File \u001b[0;32m~/workspaces/smote_variants/smote_variants/base/_metrictensor.py:769\u001b[0m, in \u001b[0;36mNearestNeighborsWithMetricTensor.kneighbors\u001b[0;34m(self, X, n_neighbors, return_distance)\u001b[0m\n\u001b[1;32m    766\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnearestn\u001b[38;5;241m.\u001b[39mkneighbors(X, n_neighbors, return_distance)\n\u001b[1;32m    768\u001b[0m n_neighbors \u001b[38;5;241m=\u001b[39m coalesce(n_neighbors, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_neighbors)\n\u001b[0;32m--> 769\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mn_neighbors_func\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mX_fitted\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    770\u001b[0m \u001b[43m                    \u001b[49m\u001b[43mmetric_tensor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmetric_tensor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    771\u001b[0m \u001b[43m                    \u001b[49m\u001b[43mn_neighbors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_neighbors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    772\u001b[0m \u001b[43m                    \u001b[49m\u001b[43mreturn_distance\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_distance\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/workspaces/smote_variants/smote_variants/base/_metrictensor.py:485\u001b[0m, in \u001b[0;36mn_neighbors_func\u001b[0;34m(X_base, X_neighbors, n_neighbors, metric_tensor, return_distance)\u001b[0m\n\u001b[1;32m    479\u001b[0m X_neighbors\u001b[38;5;241m=\u001b[39m X_neighbors \u001b[38;5;28;01mif\u001b[39;00m X_neighbors \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m X_base\n\u001b[1;32m    481\u001b[0m distm \u001b[38;5;241m=\u001b[39m pairwise_distances_mahalanobis(X_base,\n\u001b[1;32m    482\u001b[0m                                         Y\u001b[38;5;241m=\u001b[39mX_neighbors,\n\u001b[1;32m    483\u001b[0m                                         tensor\u001b[38;5;241m=\u001b[39mmetric_tensor)\n\u001b[0;32m--> 485\u001b[0m results_ind\u001b[38;5;241m=\u001b[39m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_along_axis\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margsort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    486\u001b[0m \u001b[43m                                    \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    487\u001b[0m \u001b[43m                                    \u001b[49m\u001b[43marr\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdistm\u001b[49m\u001b[43m)\u001b[49m[:,:(n_neighbors)]\n\u001b[1;32m    489\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m return_distance:\n\u001b[1;32m    490\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m results_ind\n",
+      "File \u001b[0;32m~/anaconda3/envs/smote-variants/lib/python3.10/site-packages/numpy/lib/shape_base.py:402\u001b[0m, in \u001b[0;36mapply_along_axis\u001b[0;34m(func1d, axis, arr, *args, **kwargs)\u001b[0m\n\u001b[1;32m    400\u001b[0m buff[ind0] \u001b[38;5;241m=\u001b[39m res\n\u001b[1;32m    401\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ind \u001b[38;5;129;01min\u001b[39;00m inds:\n\u001b[0;32m--> 402\u001b[0m     buff[ind] \u001b[38;5;241m=\u001b[39m asanyarray(func1d(inarr_view[ind], \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs))\n\u001b[1;32m    404\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(res, matrix):\n\u001b[1;32m    405\u001b[0m     \u001b[38;5;66;03m# wrap the array, to preserve subclasses\u001b[39;00m\n\u001b[1;32m    406\u001b[0m     buff \u001b[38;5;241m=\u001b[39m res\u001b[38;5;241m.\u001b[39m__array_wrap__(buff)\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "dataset = binclas.load_yeast_1_vs_7()\n",
+    "dataset = binclas.load_saheart()\n",
+    "dataset = binclas.load_haberman()\n",
+    "X = dataset['data']\n",
+    "y = dataset['target']\n",
+    "\n",
+    "print(len(X), len(np.unique(X, axis=0)))\n",
+    "print(X[:3])\n",
+    "print([len(np.unique(X[:, idx])) for idx in range(X.shape[1])])\n",
+    "print(X[y == 1][:5])\n",
+    "print(np.bincount(y))\n",
+    "print(joint_neighborhood_ratios(X, y))\n",
+    "print(comparison(X, y))\n",
+    "\n",
+    "validator = RepeatedStratifiedKFold(n_splits=5, n_repeats=100, random_state=5)\n",
+    "\n",
+    "res0 = []\n",
+    "res1 = []\n",
+    "res2 = []\n",
+    "res3 = []\n",
+    "res4 = []\n",
+    "\n",
+    "#X = X + np.random.random_sample(size=X.shape) * 0.001\n",
+    "\n",
+    "for idx, (train, test) in enumerate(validator.split(X, y, y)):\n",
+    "\n",
+    "    X_train = X[train]\n",
+    "    X_test = X[test]\n",
+    "    y_train = y[train]\n",
+    "    y_test = y[test]\n",
+    "\n",
+    "\n",
+    "\n",
+    "    \"\"\"pca = PCA(n_components=X_train.shape[1]).fit(X_train)\n",
+    "    X_train = pca.transform(X_train)\n",
+    "    X_test = pca.transform(X_test)\"\"\"\n",
+    "\n",
+    "    ss = StandardScaler()\n",
+    "    X_train = ss.fit_transform(X_train)\n",
+    "    X_test = ss.transform(X_test)\n",
+    "\n",
+    "    X1_mean = np.mean(X_train[y_train == 1], axis=0)\n",
+    "    dists = np.sum((X_train[y_train==1] - X1_mean)**2, axis=1)\n",
+    "    mask = dists < np.sort(dists)[-5]\n",
+    "    X0 = X_train[y_train == 0]\n",
+    "    X1 = X_train[y_train == 1][mask]\n",
+    "    X_train = np.vstack([X0, X1])\n",
+    "    y_train = np.hstack([np.repeat(0, X0.shape[0]), np.repeat(1, X1.shape[0])])\n",
+    "\n",
+    "    smote0 = SMOTE(random_state=5)\n",
+    "    smote1 = SMOTE(\n",
+    "        random_state=5,\n",
+    "        n_neighbors=5,\n",
+    "        #proportion=2.0,\n",
+    "        ss_params={'gaussian_component': {'sigma': 0.0, 'fraction': 1.0}, 'n_dim': 2},\n",
+    "        #nn_params={'metric_learning_method': 'MI_weighted', 'metric': 'precomputed', 'random_state': 5}\n",
+    "    )\n",
+    "    smote2 = SMOTE(\n",
+    "        random_state=5,\n",
+    "        #n_neighbors=1,\n",
+    "        #proportion=2.0,\n",
+    "        ss_params={'gaussian_component': {'sigma': 0.01, 'fraction': 1.0}, 'n_dim': 2},\n",
+    "        #nn_params={'metric_learning_method': 'id', 'metric': 'precomputed', 'random_state': 5}\n",
+    "    )\n",
+    "    smote3 = SMOTE(\n",
+    "        random_state=5,\n",
+    "        #n_neighbors=1,\n",
+    "        #proportion=2.0,\n",
+    "        ss_params={'gaussian_component': {'sigma': 0.0, 'fraction': 1.0}, 'n_dim': 2},\n",
+    "        #nn_params={'metric_learning_method': 'n_unique_inv', 'metric': 'precomputed', 'random_state': 5}\n",
+    "    )\n",
+    "\n",
+    "    X_samp0, y_samp0 = smote0.sample(X_train, y_train)\n",
+    "    X_samp1, y_samp1 = smote1.sample(X_train, y_train)\n",
+    "    X_samp2, y_samp2 = smote2.sample(X_train, y_train)\n",
+    "    X_samp3, y_samp3 = smote3.sample(X_train, y_train)\n",
+    "\n",
+    "    sample_weight=np.hstack([np.repeat(1.0, X_train.shape[0]), np.repeat(0.5, X_samp1.shape[0] - X_train.shape[0])])\n",
+    "\n",
+    "    msl = 13\n",
+    "\n",
+    "    rf_params = {'min_samples_leaf': 13, 'random_state': 5, 'bootstrap': True, 'n_estimators': 1000}\n",
+    "    rfs_params = {'min_samples_leaf': msl, 'random_state': 6, 'bootstrap': True, 'n_estimators': 1000}\n",
+    "\n",
+    "    classifier = OperatorRandomForestClassifier(**(rfs_params | {'bootstrap': True, 'min_weight_fraction_leaf': 2.0/len(X_samp0)}), mode='avg_half')\n",
+    "    classifier.fit(X_samp0, y_samp0)\n",
+    "    pred = classifier.predict_proba(X_test)[:, 1]\n",
+    "    res0.append(roc_auc_score(y_test, pred))\n",
+    "\n",
+    "    classifier = RandomForestClassifierImproved(**(rfs_params | {'min_samples_leaf': msl}), smote=smote3)\n",
+    "    classifier.fit(X_train, y_train)\n",
+    "    pred = classifier.predict_proba(X_test)[:, 1]\n",
+    "    res1.append(roc_auc_score(y_test, pred))\n",
+    "\n",
+    "    classifier = OperatorRandomForestClassifier(**rfs_params, mode='avg_half')\n",
+    "    classifier.fit(X_samp2, y_samp2)\n",
+    "    pred = classifier.predict_proba(X_test)[:, 1]\n",
+    "    res2.append(roc_auc_score(y_test, pred))\n",
+    "\n",
+    "    \"\"\"classifier = RandomForestClassifier(**rfs_params)\n",
+    "    classifier.fit(X_samp3, y_samp3, sample_weight=sample_weight)\n",
+    "    pred = classifier.predict_proba(X_test)[:, 1]\n",
+    "    res3.append(roc_auc_score(y_test, pred))\"\"\"\n",
+    "\n",
+    "\n",
+    "    classifier = OperatorRandomForestClassifier(**rfs_params#, class_weight={0: 1.0, 1: np.sum(1 - y_train)/np.sum(y_train)}\n",
+    "                                        ,mode='avg_half')\n",
+    "    classifier.fit(X_train, y_train)\n",
+    "    pred = classifier.predict_proba(X_test)[:, 1]\n",
+    "    res3.append(roc_auc_score(y_test, pred))\n",
+    "\n",
+    "\n",
+    "    classifier = OperatorRandomForestClassifier(**(rf_params | {'bootstrap': True}), mode='avg_half')\n",
+    "    classifier.fit(X_train, y_train)\n",
+    "    pred = classifier.predict_proba(X_test)[:, 1]\n",
+    "    res4.append(roc_auc_score(y_test, pred))\n",
+    "\n",
+    "    if idx % 10 == 0:\n",
+    "        print(np.mean(res0), np.mean(res1), np.mean(res2), np.mean(res3), np.mean(res4))\n",
+    "    #break\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(0.9252715517241379,\n",
+       " 0.9300951794510908,\n",
+       " 0.9287995249824067,\n",
+       " 0.9308575826882477,\n",
+       " 0.9308951442646023)"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.mean(res0), np.mean(res1), np.mean(res2), np.mean(res3), np.mean(res4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(0.9285714285714286,\n",
+       " 0.935960591133005,\n",
+       " 0.9334975369458128,\n",
+       " 0.93481703026038,\n",
+       " 0.9359605911330049)"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.median(res0), np.median(res1), np.median(res2), np.median(res3), np.median(res4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "smote-variants",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/development/033-analysis-ml.ipynb b/notebooks/development/033-analysis-ml.ipynb
new file mode 100644
index 0000000..684fc56
--- /dev/null
+++ b/notebooks/development/033-analysis-ml.ipynb
@@ -0,0 +1,1304 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from scipy.stats import wilcoxon\n",
+    "import common_datasets.binary_classification as binclas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = pd.read_csv('bupa-ml.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>name</th>\n",
+       "      <th>fold</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>bupa</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 4, 'random_state': 5}</td>\n",
+       "      <td>0.737500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>bupa</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 6, 'random_state': 5}</td>\n",
+       "      <td>0.675431</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>bupa</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 8, 'random_state': 5}</td>\n",
+       "      <td>0.681034</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>bupa</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>{'max_depth': 4, 'random_state': 5}</td>\n",
+       "      <td>0.859483</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>bupa</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>{'max_depth': 6, 'random_state': 5}</td>\n",
+       "      <td>0.858621</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Unnamed: 0  name  fold                                             sparam  \\\n",
+       "0           0  bupa     0  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "1           1  bupa     0  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "2           2  bupa     0  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "3           3  bupa     0  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "4           4  bupa     0  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "\n",
+       "               classifier                               cparam       auc  \n",
+       "0  DecisionTreeClassifier  {'max_depth': 4, 'random_state': 5}  0.737500  \n",
+       "1  DecisionTreeClassifier  {'max_depth': 6, 'random_state': 5}  0.675431  \n",
+       "2  DecisionTreeClassifier  {'max_depth': 8, 'random_state': 5}  0.681034  \n",
+       "3  RandomForestClassifier  {'max_depth': 4, 'random_state': 5}  0.859483  \n",
+       "4  RandomForestClassifier  {'max_depth': 6, 'random_state': 5}  0.858621  "
+      ]
+     },
+     "execution_count": 63,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data['sparam'] = data['sparam'].apply(eval)\n",
+    "data['cparam'] = data['cparam'].apply(eval)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def remove_key(dict, key):\n",
+    "    del dict[key]\n",
+    "    return dict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data['metric'] = data['sparam'].apply(lambda x: x['nn_params']['metric_learning_method'])\n",
+    "data['sparam'] = data['sparam'].apply(lambda x: remove_key(x, 'nn_params'))\n",
+    "data['cparam'] = data['cparam'].apply(str)\n",
+    "data['sparam'] = data['sparam'].apply(str)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['Unnamed: 0', 'name', 'fold', 'sparam', 'classifier', 'cparam', 'auc',\n",
+       "       'metric'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 67,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>name</th>\n",
+       "      <th>fold</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc</th>\n",
+       "      <th>metric</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>bupa</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 4, 'random_state': 5}</td>\n",
+       "      <td>0.737500</td>\n",
+       "      <td>id</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>bupa</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 6, 'random_state': 5}</td>\n",
+       "      <td>0.675431</td>\n",
+       "      <td>id</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>bupa</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 8, 'random_state': 5}</td>\n",
+       "      <td>0.681034</td>\n",
+       "      <td>id</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>bupa</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>{'max_depth': 4, 'random_state': 5}</td>\n",
+       "      <td>0.859483</td>\n",
+       "      <td>id</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>bupa</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>{'max_depth': 6, 'random_state': 5}</td>\n",
+       "      <td>0.858621</td>\n",
+       "      <td>id</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Unnamed: 0  name  fold                                             sparam  \\\n",
+       "0           0  bupa     0  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "1           1  bupa     0  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "2           2  bupa     0  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "3           3  bupa     0  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "4           4  bupa     0  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "\n",
+       "               classifier                               cparam       auc  \\\n",
+       "0  DecisionTreeClassifier  {'max_depth': 4, 'random_state': 5}  0.737500   \n",
+       "1  DecisionTreeClassifier  {'max_depth': 6, 'random_state': 5}  0.675431   \n",
+       "2  DecisionTreeClassifier  {'max_depth': 8, 'random_state': 5}  0.681034   \n",
+       "3  RandomForestClassifier  {'max_depth': 4, 'random_state': 5}  0.859483   \n",
+       "4  RandomForestClassifier  {'max_depth': 6, 'random_state': 5}  0.858621   \n",
+       "\n",
+       "  metric  \n",
+       "0     id  \n",
+       "1     id  \n",
+       "2     id  \n",
+       "3     id  \n",
+       "4     id  "
+      ]
+     },
+     "execution_count": 68,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grouped = data.groupby(['name', 'sparam', 'classifier', 'cparam', 'metric']).apply(lambda pdf: pdf.sort_values('fold')['auc'].values.tolist())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grouped = grouped.reset_index(drop=False)\n",
+    "grouped = grouped.rename(columns={0: 'auc'})\n",
+    "determ = grouped[grouped['metric'] == 'MI_weighted'].drop(columns=['metric'])\n",
+    "rand = grouped[grouped['metric'] == 'id'].drop(columns=['metric'])\n",
+    "merged = pd.merge(determ.rename(columns={'auc': 'auc_det'}), rand, on=['name', 'sparam', 'classifier', 'cparam'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "merged['auc_mean_det'] = merged['auc_det'].apply(np.mean)\n",
+    "merged['auc_std_det'] = merged['auc_det'].apply(np.std)\n",
+    "merged['auc_min_det'] = merged['auc_det'].apply(np.min)\n",
+    "merged['auc_max_det'] = merged['auc_det'].apply(np.max)\n",
+    "merged['auc_mean'] = merged['auc'].apply(np.mean)\n",
+    "merged['auc_std'] = merged['auc'].apply(np.std)\n",
+    "merged['auc_min'] = merged['auc'].apply(np.min)\n",
+    "merged['auc_max'] = merged['auc'].apply(np.max)\n",
+    "merged['p_l'] = merged.apply(lambda row: wilcoxon(row['auc_det'], row['auc'], zero_method='zsplit', alternative='less').pvalue, axis=1)\n",
+    "merged['p_g'] = merged.apply(lambda row: wilcoxon(row['auc_det'], row['auc'], zero_method='zsplit', alternative='greater').pvalue, axis=1)\n",
+    "merged['f_l'] = merged['p_l'] < 0.05\n",
+    "merged['f_g'] = merged['p_g'] < 0.05"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def model_selection(pdf):\n",
+    "    max_det = pdf[pdf['auc_mean_det'] == pdf['auc_mean_det'].max()].iloc[0]\n",
+    "    max_ran = pdf[pdf['auc_mean'] == pdf['auc_mean'].max()].iloc[0]\n",
+    "    return pd.Series({'auc_mean_det': max_det['auc_mean_det'],\n",
+    "            'auc_mean': max_ran['auc_mean'],\n",
+    "            'auc_std_det': max_det['auc_std_det'],\n",
+    "            'auc_std': max_ran['auc_std'],\n",
+    "            'p_l': wilcoxon(max_det['auc_det'], max_ran['auc'], zero_method='zsplit', alternative='less').pvalue,\n",
+    "            'p_g': wilcoxon(max_det['auc_det'], max_ran['auc'], zero_method='zsplit', alternative='greater').pvalue,\n",
+    "            'auc_median_det': np.median(max_det['auc_det']),\n",
+    "            'auc_median': np.median(max_ran['auc'])})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>auc_mean_det</th>\n",
+       "      <th>auc_mean</th>\n",
+       "      <th>auc_std_det</th>\n",
+       "      <th>auc_std</th>\n",
+       "      <th>p_l</th>\n",
+       "      <th>p_g</th>\n",
+       "      <th>auc_median_det</th>\n",
+       "      <th>auc_median</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>classifier</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>DecisionTreeClassifier</th>\n",
+       "      <td>0.661172</td>\n",
+       "      <td>0.661109</td>\n",
+       "      <td>0.060384</td>\n",
+       "      <td>0.059722</td>\n",
+       "      <td>0.443313</td>\n",
+       "      <td>0.556687</td>\n",
+       "      <td>0.662931</td>\n",
+       "      <td>0.664224</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>KNeighborsClassifier</th>\n",
+       "      <td>0.659195</td>\n",
+       "      <td>0.658767</td>\n",
+       "      <td>0.055252</td>\n",
+       "      <td>0.055262</td>\n",
+       "      <td>0.665874</td>\n",
+       "      <td>0.334126</td>\n",
+       "      <td>0.661207</td>\n",
+       "      <td>0.657543</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>RandomForestClassifier</th>\n",
+       "      <td>0.762595</td>\n",
+       "      <td>0.764598</td>\n",
+       "      <td>0.052691</td>\n",
+       "      <td>0.052473</td>\n",
+       "      <td>0.002484</td>\n",
+       "      <td>0.997516</td>\n",
+       "      <td>0.765517</td>\n",
+       "      <td>0.766810</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SVC</th>\n",
+       "      <td>0.650130</td>\n",
+       "      <td>0.650833</td>\n",
+       "      <td>0.065829</td>\n",
+       "      <td>0.065979</td>\n",
+       "      <td>0.001734</td>\n",
+       "      <td>0.998266</td>\n",
+       "      <td>0.651724</td>\n",
+       "      <td>0.652155</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                        auc_mean_det  auc_mean  auc_std_det   auc_std  \\\n",
+       "classifier                                                              \n",
+       "DecisionTreeClassifier      0.661172  0.661109     0.060384  0.059722   \n",
+       "KNeighborsClassifier        0.659195  0.658767     0.055252  0.055262   \n",
+       "RandomForestClassifier      0.762595  0.764598     0.052691  0.052473   \n",
+       "SVC                         0.650130  0.650833     0.065829  0.065979   \n",
+       "\n",
+       "                             p_l       p_g  auc_median_det  auc_median  \n",
+       "classifier                                                              \n",
+       "DecisionTreeClassifier  0.443313  0.556687        0.662931    0.664224  \n",
+       "KNeighborsClassifier    0.665874  0.334126        0.661207    0.657543  \n",
+       "RandomForestClassifier  0.002484  0.997516        0.765517    0.766810  \n",
+       "SVC                     0.001734  0.998266        0.651724    0.652155  "
+      ]
+     },
+     "execution_count": 73,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged.groupby('classifier').apply(model_selection)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc_det</th>\n",
+       "      <th>auc</th>\n",
+       "      <th>auc_mean_det</th>\n",
+       "      <th>auc_std_det</th>\n",
+       "      <th>auc_min_det</th>\n",
+       "      <th>auc_max_det</th>\n",
+       "      <th>auc_mean</th>\n",
+       "      <th>auc_std</th>\n",
+       "      <th>auc_min</th>\n",
+       "      <th>auc_max</th>\n",
+       "      <th>p_l</th>\n",
+       "      <th>p_g</th>\n",
+       "      <th>f_l</th>\n",
+       "      <th>f_g</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>classifier</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>DecisionTreeClassifier</th>\n",
+       "      <th>0</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 4, 'random_state': 5}</td>\n",
+       "      <td>[0.6857758620689656, 0.5900862068965518, 0.638...</td>\n",
+       "      <td>[0.7375, 0.6668103448275862, 0.7, 0.6504310344...</td>\n",
+       "      <td>0.661172</td>\n",
+       "      <td>0.060384</td>\n",
+       "      <td>0.458190</td>\n",
+       "      <td>0.855603</td>\n",
+       "      <td>0.661109</td>\n",
+       "      <td>0.059722</td>\n",
+       "      <td>0.475000</td>\n",
+       "      <td>0.833621</td>\n",
+       "      <td>0.443313</td>\n",
+       "      <td>0.556687</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>KNeighborsClassifier</th>\n",
+       "      <th>4</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>KNeighborsClassifier</td>\n",
+       "      <td>{'n_neighbors': 3}</td>\n",
+       "      <td>[0.6133620689655173, 0.6340517241379311, 0.630...</td>\n",
+       "      <td>[0.6051724137931035, 0.6189655172413793, 0.637...</td>\n",
+       "      <td>0.659195</td>\n",
+       "      <td>0.055252</td>\n",
+       "      <td>0.482759</td>\n",
+       "      <td>0.853448</td>\n",
+       "      <td>0.658767</td>\n",
+       "      <td>0.055262</td>\n",
+       "      <td>0.492241</td>\n",
+       "      <td>0.853448</td>\n",
+       "      <td>0.665874</td>\n",
+       "      <td>0.334126</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>RandomForestClassifier</th>\n",
+       "      <th>10</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>{'max_depth': 8, 'random_state': 5}</td>\n",
+       "      <td>[0.8543103448275862, 0.7698275862068966, 0.755...</td>\n",
+       "      <td>[0.8551724137931035, 0.7422413793103448, 0.755...</td>\n",
+       "      <td>0.762595</td>\n",
+       "      <td>0.052691</td>\n",
+       "      <td>0.603448</td>\n",
+       "      <td>0.928448</td>\n",
+       "      <td>0.764598</td>\n",
+       "      <td>0.052473</td>\n",
+       "      <td>0.606034</td>\n",
+       "      <td>0.923276</td>\n",
+       "      <td>0.002484</td>\n",
+       "      <td>0.997516</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SVC</th>\n",
+       "      <th>19</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>SVC</td>\n",
+       "      <td>{'C': 0.1, 'probability': True, 'random_state'...</td>\n",
+       "      <td>[0.746551724137931, 0.6741379310344828, 0.6362...</td>\n",
+       "      <td>[0.7543103448275862, 0.6715517241379311, 0.634...</td>\n",
+       "      <td>0.650130</td>\n",
+       "      <td>0.065829</td>\n",
+       "      <td>0.437069</td>\n",
+       "      <td>0.850862</td>\n",
+       "      <td>0.650833</td>\n",
+       "      <td>0.065979</td>\n",
+       "      <td>0.434483</td>\n",
+       "      <td>0.850862</td>\n",
+       "      <td>0.001734</td>\n",
+       "      <td>0.998266</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                           name  \\\n",
+       "classifier                        \n",
+       "DecisionTreeClassifier 0   bupa   \n",
+       "KNeighborsClassifier   4   bupa   \n",
+       "RandomForestClassifier 10  bupa   \n",
+       "SVC                    19  bupa   \n",
+       "\n",
+       "                                                                      sparam  \\\n",
+       "classifier                                                                     \n",
+       "DecisionTreeClassifier 0   {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "KNeighborsClassifier   4   {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "RandomForestClassifier 10  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "SVC                    19  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "\n",
+       "                                       classifier  \\\n",
+       "classifier                                          \n",
+       "DecisionTreeClassifier 0   DecisionTreeClassifier   \n",
+       "KNeighborsClassifier   4     KNeighborsClassifier   \n",
+       "RandomForestClassifier 10  RandomForestClassifier   \n",
+       "SVC                    19                     SVC   \n",
+       "\n",
+       "                                                                      cparam  \\\n",
+       "classifier                                                                     \n",
+       "DecisionTreeClassifier 0                 {'max_depth': 4, 'random_state': 5}   \n",
+       "KNeighborsClassifier   4                                  {'n_neighbors': 3}   \n",
+       "RandomForestClassifier 10                {'max_depth': 8, 'random_state': 5}   \n",
+       "SVC                    19  {'C': 0.1, 'probability': True, 'random_state'...   \n",
+       "\n",
+       "                                                                     auc_det  \\\n",
+       "classifier                                                                     \n",
+       "DecisionTreeClassifier 0   [0.6857758620689656, 0.5900862068965518, 0.638...   \n",
+       "KNeighborsClassifier   4   [0.6133620689655173, 0.6340517241379311, 0.630...   \n",
+       "RandomForestClassifier 10  [0.8543103448275862, 0.7698275862068966, 0.755...   \n",
+       "SVC                    19  [0.746551724137931, 0.6741379310344828, 0.6362...   \n",
+       "\n",
+       "                                                                         auc  \\\n",
+       "classifier                                                                     \n",
+       "DecisionTreeClassifier 0   [0.7375, 0.6668103448275862, 0.7, 0.6504310344...   \n",
+       "KNeighborsClassifier   4   [0.6051724137931035, 0.6189655172413793, 0.637...   \n",
+       "RandomForestClassifier 10  [0.8551724137931035, 0.7422413793103448, 0.755...   \n",
+       "SVC                    19  [0.7543103448275862, 0.6715517241379311, 0.634...   \n",
+       "\n",
+       "                           auc_mean_det  auc_std_det  auc_min_det  \\\n",
+       "classifier                                                          \n",
+       "DecisionTreeClassifier 0       0.661172     0.060384     0.458190   \n",
+       "KNeighborsClassifier   4       0.659195     0.055252     0.482759   \n",
+       "RandomForestClassifier 10      0.762595     0.052691     0.603448   \n",
+       "SVC                    19      0.650130     0.065829     0.437069   \n",
+       "\n",
+       "                           auc_max_det  auc_mean   auc_std   auc_min  \\\n",
+       "classifier                                                             \n",
+       "DecisionTreeClassifier 0      0.855603  0.661109  0.059722  0.475000   \n",
+       "KNeighborsClassifier   4      0.853448  0.658767  0.055262  0.492241   \n",
+       "RandomForestClassifier 10     0.928448  0.764598  0.052473  0.606034   \n",
+       "SVC                    19     0.850862  0.650833  0.065979  0.434483   \n",
+       "\n",
+       "                            auc_max       p_l       p_g    f_l    f_g  \n",
+       "classifier                                                             \n",
+       "DecisionTreeClassifier 0   0.833621  0.443313  0.556687  False  False  \n",
+       "KNeighborsClassifier   4   0.853448  0.665874  0.334126  False  False  \n",
+       "RandomForestClassifier 10  0.923276  0.002484  0.997516   True  False  \n",
+       "SVC                    19  0.850862  0.001734  0.998266   True  False  "
+      ]
+     },
+     "execution_count": 74,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged.groupby('classifier').apply(lambda pdf: pdf[pdf['auc_mean_det'] == pdf['auc_mean_det'].max()])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc_det</th>\n",
+       "      <th>auc</th>\n",
+       "      <th>auc_mean_det</th>\n",
+       "      <th>auc_std_det</th>\n",
+       "      <th>auc_min_det</th>\n",
+       "      <th>auc_max_det</th>\n",
+       "      <th>auc_mean</th>\n",
+       "      <th>auc_std</th>\n",
+       "      <th>auc_min</th>\n",
+       "      <th>auc_max</th>\n",
+       "      <th>p_l</th>\n",
+       "      <th>p_g</th>\n",
+       "      <th>f_l</th>\n",
+       "      <th>f_g</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>{'max_depth': 8, 'random_state': 5}</td>\n",
+       "      <td>[0.8543103448275862, 0.7698275862068966, 0.755...</td>\n",
+       "      <td>[0.8551724137931035, 0.7422413793103448, 0.755...</td>\n",
+       "      <td>0.762595</td>\n",
+       "      <td>0.052691</td>\n",
+       "      <td>0.603448</td>\n",
+       "      <td>0.928448</td>\n",
+       "      <td>0.764598</td>\n",
+       "      <td>0.052473</td>\n",
+       "      <td>0.606034</td>\n",
+       "      <td>0.923276</td>\n",
+       "      <td>0.002484</td>\n",
+       "      <td>0.997516</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    name                                             sparam  \\\n",
+       "10  bupa  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "\n",
+       "                classifier                               cparam  \\\n",
+       "10  RandomForestClassifier  {'max_depth': 8, 'random_state': 5}   \n",
+       "\n",
+       "                                              auc_det  \\\n",
+       "10  [0.8543103448275862, 0.7698275862068966, 0.755...   \n",
+       "\n",
+       "                                                  auc  auc_mean_det  \\\n",
+       "10  [0.8551724137931035, 0.7422413793103448, 0.755...      0.762595   \n",
+       "\n",
+       "    auc_std_det  auc_min_det  auc_max_det  auc_mean   auc_std   auc_min  \\\n",
+       "10     0.052691     0.603448     0.928448  0.764598  0.052473  0.606034   \n",
+       "\n",
+       "     auc_max       p_l       p_g   f_l    f_g  \n",
+       "10  0.923276  0.002484  0.997516  True  False  "
+      ]
+     },
+     "execution_count": 75,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged[merged['auc_mean_det'] == merged['auc_mean_det'].max()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc_det</th>\n",
+       "      <th>auc</th>\n",
+       "      <th>auc_mean_det</th>\n",
+       "      <th>auc_std_det</th>\n",
+       "      <th>auc_min_det</th>\n",
+       "      <th>auc_max_det</th>\n",
+       "      <th>auc_mean</th>\n",
+       "      <th>auc_std</th>\n",
+       "      <th>auc_min</th>\n",
+       "      <th>auc_max</th>\n",
+       "      <th>p_l</th>\n",
+       "      <th>p_g</th>\n",
+       "      <th>f_l</th>\n",
+       "      <th>f_g</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>classifier</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>DecisionTreeClassifier</th>\n",
+       "      <th>0</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 4, 'random_state': 5}</td>\n",
+       "      <td>[0.6857758620689656, 0.5900862068965518, 0.638...</td>\n",
+       "      <td>[0.7375, 0.6668103448275862, 0.7, 0.6504310344...</td>\n",
+       "      <td>0.661172</td>\n",
+       "      <td>0.060384</td>\n",
+       "      <td>0.458190</td>\n",
+       "      <td>0.855603</td>\n",
+       "      <td>0.661109</td>\n",
+       "      <td>0.059722</td>\n",
+       "      <td>0.475000</td>\n",
+       "      <td>0.833621</td>\n",
+       "      <td>0.443313</td>\n",
+       "      <td>0.556687</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>KNeighborsClassifier</th>\n",
+       "      <th>4</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>KNeighborsClassifier</td>\n",
+       "      <td>{'n_neighbors': 3}</td>\n",
+       "      <td>[0.6133620689655173, 0.6340517241379311, 0.630...</td>\n",
+       "      <td>[0.6051724137931035, 0.6189655172413793, 0.637...</td>\n",
+       "      <td>0.659195</td>\n",
+       "      <td>0.055252</td>\n",
+       "      <td>0.482759</td>\n",
+       "      <td>0.853448</td>\n",
+       "      <td>0.658767</td>\n",
+       "      <td>0.055262</td>\n",
+       "      <td>0.492241</td>\n",
+       "      <td>0.853448</td>\n",
+       "      <td>0.665874</td>\n",
+       "      <td>0.334126</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>RandomForestClassifier</th>\n",
+       "      <th>10</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>{'max_depth': 8, 'random_state': 5}</td>\n",
+       "      <td>[0.8543103448275862, 0.7698275862068966, 0.755...</td>\n",
+       "      <td>[0.8551724137931035, 0.7422413793103448, 0.755...</td>\n",
+       "      <td>0.762595</td>\n",
+       "      <td>0.052691</td>\n",
+       "      <td>0.603448</td>\n",
+       "      <td>0.928448</td>\n",
+       "      <td>0.764598</td>\n",
+       "      <td>0.052473</td>\n",
+       "      <td>0.606034</td>\n",
+       "      <td>0.923276</td>\n",
+       "      <td>0.002484</td>\n",
+       "      <td>0.997516</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SVC</th>\n",
+       "      <th>19</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>SVC</td>\n",
+       "      <td>{'C': 0.1, 'probability': True, 'random_state'...</td>\n",
+       "      <td>[0.746551724137931, 0.6741379310344828, 0.6362...</td>\n",
+       "      <td>[0.7543103448275862, 0.6715517241379311, 0.634...</td>\n",
+       "      <td>0.650130</td>\n",
+       "      <td>0.065829</td>\n",
+       "      <td>0.437069</td>\n",
+       "      <td>0.850862</td>\n",
+       "      <td>0.650833</td>\n",
+       "      <td>0.065979</td>\n",
+       "      <td>0.434483</td>\n",
+       "      <td>0.850862</td>\n",
+       "      <td>0.001734</td>\n",
+       "      <td>0.998266</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                           name  \\\n",
+       "classifier                        \n",
+       "DecisionTreeClassifier 0   bupa   \n",
+       "KNeighborsClassifier   4   bupa   \n",
+       "RandomForestClassifier 10  bupa   \n",
+       "SVC                    19  bupa   \n",
+       "\n",
+       "                                                                      sparam  \\\n",
+       "classifier                                                                     \n",
+       "DecisionTreeClassifier 0   {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "KNeighborsClassifier   4   {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "RandomForestClassifier 10  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "SVC                    19  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "\n",
+       "                                       classifier  \\\n",
+       "classifier                                          \n",
+       "DecisionTreeClassifier 0   DecisionTreeClassifier   \n",
+       "KNeighborsClassifier   4     KNeighborsClassifier   \n",
+       "RandomForestClassifier 10  RandomForestClassifier   \n",
+       "SVC                    19                     SVC   \n",
+       "\n",
+       "                                                                      cparam  \\\n",
+       "classifier                                                                     \n",
+       "DecisionTreeClassifier 0                 {'max_depth': 4, 'random_state': 5}   \n",
+       "KNeighborsClassifier   4                                  {'n_neighbors': 3}   \n",
+       "RandomForestClassifier 10                {'max_depth': 8, 'random_state': 5}   \n",
+       "SVC                    19  {'C': 0.1, 'probability': True, 'random_state'...   \n",
+       "\n",
+       "                                                                     auc_det  \\\n",
+       "classifier                                                                     \n",
+       "DecisionTreeClassifier 0   [0.6857758620689656, 0.5900862068965518, 0.638...   \n",
+       "KNeighborsClassifier   4   [0.6133620689655173, 0.6340517241379311, 0.630...   \n",
+       "RandomForestClassifier 10  [0.8543103448275862, 0.7698275862068966, 0.755...   \n",
+       "SVC                    19  [0.746551724137931, 0.6741379310344828, 0.6362...   \n",
+       "\n",
+       "                                                                         auc  \\\n",
+       "classifier                                                                     \n",
+       "DecisionTreeClassifier 0   [0.7375, 0.6668103448275862, 0.7, 0.6504310344...   \n",
+       "KNeighborsClassifier   4   [0.6051724137931035, 0.6189655172413793, 0.637...   \n",
+       "RandomForestClassifier 10  [0.8551724137931035, 0.7422413793103448, 0.755...   \n",
+       "SVC                    19  [0.7543103448275862, 0.6715517241379311, 0.634...   \n",
+       "\n",
+       "                           auc_mean_det  auc_std_det  auc_min_det  \\\n",
+       "classifier                                                          \n",
+       "DecisionTreeClassifier 0       0.661172     0.060384     0.458190   \n",
+       "KNeighborsClassifier   4       0.659195     0.055252     0.482759   \n",
+       "RandomForestClassifier 10      0.762595     0.052691     0.603448   \n",
+       "SVC                    19      0.650130     0.065829     0.437069   \n",
+       "\n",
+       "                           auc_max_det  auc_mean   auc_std   auc_min  \\\n",
+       "classifier                                                             \n",
+       "DecisionTreeClassifier 0      0.855603  0.661109  0.059722  0.475000   \n",
+       "KNeighborsClassifier   4      0.853448  0.658767  0.055262  0.492241   \n",
+       "RandomForestClassifier 10     0.928448  0.764598  0.052473  0.606034   \n",
+       "SVC                    19     0.850862  0.650833  0.065979  0.434483   \n",
+       "\n",
+       "                            auc_max       p_l       p_g    f_l    f_g  \n",
+       "classifier                                                             \n",
+       "DecisionTreeClassifier 0   0.833621  0.443313  0.556687  False  False  \n",
+       "KNeighborsClassifier   4   0.853448  0.665874  0.334126  False  False  \n",
+       "RandomForestClassifier 10  0.923276  0.002484  0.997516   True  False  \n",
+       "SVC                    19  0.850862  0.001734  0.998266   True  False  "
+      ]
+     },
+     "execution_count": 76,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged.groupby('classifier').apply(lambda pdf: pdf[pdf['auc_mean'] == pdf['auc_mean'].max()])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\"{'C': 0.1, 'probability': True, 'random_state': 5}\""
+      ]
+     },
+     "execution_count": 82,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tmp = merged.groupby('classifier').apply(lambda pdf: pdf[pdf['auc_mean'] == pdf['auc_mean'].max()])\n",
+    "tmp.iloc[-1]['cparam']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc_det</th>\n",
+       "      <th>auc</th>\n",
+       "      <th>auc_mean_det</th>\n",
+       "      <th>auc_std_det</th>\n",
+       "      <th>auc_min_det</th>\n",
+       "      <th>auc_max_det</th>\n",
+       "      <th>auc_mean</th>\n",
+       "      <th>auc_std</th>\n",
+       "      <th>auc_min</th>\n",
+       "      <th>auc_max</th>\n",
+       "      <th>p_l</th>\n",
+       "      <th>p_g</th>\n",
+       "      <th>f_l</th>\n",
+       "      <th>f_g</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>{'max_depth': 8, 'random_state': 5}</td>\n",
+       "      <td>[0.8543103448275862, 0.7698275862068966, 0.755...</td>\n",
+       "      <td>[0.8551724137931035, 0.7422413793103448, 0.755...</td>\n",
+       "      <td>0.762595</td>\n",
+       "      <td>0.052691</td>\n",
+       "      <td>0.603448</td>\n",
+       "      <td>0.928448</td>\n",
+       "      <td>0.764598</td>\n",
+       "      <td>0.052473</td>\n",
+       "      <td>0.606034</td>\n",
+       "      <td>0.923276</td>\n",
+       "      <td>0.002484</td>\n",
+       "      <td>0.997516</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    name                                             sparam  \\\n",
+       "10  bupa  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "\n",
+       "                classifier                               cparam  \\\n",
+       "10  RandomForestClassifier  {'max_depth': 8, 'random_state': 5}   \n",
+       "\n",
+       "                                              auc_det  \\\n",
+       "10  [0.8543103448275862, 0.7698275862068966, 0.755...   \n",
+       "\n",
+       "                                                  auc  auc_mean_det  \\\n",
+       "10  [0.8551724137931035, 0.7422413793103448, 0.755...      0.762595   \n",
+       "\n",
+       "    auc_std_det  auc_min_det  auc_max_det  auc_mean   auc_std   auc_min  \\\n",
+       "10     0.052691     0.603448     0.928448  0.764598  0.052473  0.606034   \n",
+       "\n",
+       "     auc_max       p_l       p_g   f_l    f_g  \n",
+       "10  0.923276  0.002484  0.997516  True  False  "
+      ]
+     },
+     "execution_count": 77,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged[merged['auc_mean'] == merged['auc_mean'].max()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "auc_mean        0.610534\n",
+       "auc_std         0.052280\n",
+       "auc_mean_det    0.610252\n",
+       "auc_std_det     0.052778\n",
+       "p_l             0.405746\n",
+       "p_g             0.594254\n",
+       "f_l             0.250000\n",
+       "f_g             0.100000\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 78,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged[['auc_mean', 'auc_std', 'auc_mean_det', 'auc_std_det', 'p_l', 'p_g', 'f_l', 'f_g']].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "auc_min        0.429138\n",
+       "auc_max        0.770366\n",
+       "auc_min_det    0.425280\n",
+       "auc_max_det    0.773621\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 79,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged[['auc_min', 'auc_max', 'auc_min_det', 'auc_max_det']].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "auc_mean        0.630787\n",
+       "auc_std         0.057362\n",
+       "auc_mean_det    0.630810\n",
+       "auc_std_det     0.057514\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 80,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged[['auc_mean', 'auc_std', 'auc_mean_det', 'auc_std_det']].median()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "smote-variants",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/development/034-test-regularization.ipynb b/notebooks/development/034-test-regularization.ipynb
new file mode 100644
index 0000000..5b61d45
--- /dev/null
+++ b/notebooks/development/034-test-regularization.ipynb
@@ -0,0 +1,279 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import datetime\n",
+    "\n",
+    "from joblib import Parallel, delayed\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "import tqdm\n",
+    "\n",
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.svm import SVC\n",
+    "from sklearn.neighbors import KNeighborsClassifier\n",
+    "from sklearn.model_selection import RepeatedStratifiedKFold\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.metrics import roc_auc_score\n",
+    "\n",
+    "from smote_variants.oversampling import SMOTE, NoSMOTE, ADASYN, Borderline_SMOTE1, ProWSyn, SMOTE_IPF, Lee, SMOBD\n",
+    "from common_datasets.binary_classification import get_filtered_data_loaders\n",
+    "import common_datasets.binary_classification as binclas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging\n",
+    "logger = logging.getLogger('smote_variants')\n",
+    "logger.setLevel(logging.ERROR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "classifiers = {\n",
+    "DecisionTreeClassifier: [{'max_depth': md, 'random_state': 5} for md in [1, 2] + list(range(3, 18, 2))],\n",
+    "RandomForestClassifier: [{'max_depth': md, 'random_state': 5, 'n_jobs': 1} for md in [1, 2] + list(range(3, 18, 2))],\n",
+    "KNeighborsClassifier: [{'n_neighbors': nn, 'n_jobs': 1} for nn in range(1, 70, 4)],\n",
+    "SVC: [{'C': c, 'probability': True, 'random_state': 5} for c in [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0, 5.0]]\\\n",
+    "            + [{'C': c, 'probability': True, 'kernel': 'poly', 'degree': 2, 'random_state': 5} for c in [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0, 5.0]]\\\n",
+    "            + [{'C': c, 'probability': True, 'kernel': 'poly', 'degree': 3, 'random_state': 5} for c in [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0, 5.0]]\\\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "datasets = get_filtered_data_loaders(n_col_bounds=(2, 50),\n",
+    "                                        n_bounds=(10, 700),\n",
+    "                                        n_minority_bounds=(10, 500),\n",
+    "                                        n_from_phenotypes=1,\n",
+    "                                        n_smallest=40)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "datasets = [loader for loader in datasets if loader not in [binclas.load_iris0, binclas.load_dermatology_6, binclas.load_shuttle_6_vs_2_3, binclas.load_monk_2, binclas.load_new_thyroid1]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "20"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(datasets)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "oversampler_classes = [SMOTE, Borderline_SMOTE1, ADASYN, ProWSyn, SMOTE_IPF, Lee, SMOBD, NoSMOTE]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "oversamplers = {}\n",
+    "for oversampler in oversampler_classes:\n",
+    "    random_state = np.random.RandomState(5)\n",
+    "    params = oversampler.parameter_combinations()\n",
+    "    params = [comb for comb in params if comb.get('proportion', 1.0) == 1.0]\n",
+    "    n_params = min(10, len(params))\n",
+    "    oversamplers[oversampler] = random_state.choice(params, n_params, replace=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def job_generator(data_loader):\n",
+    "\n",
+    "    dataset = data_loader()\n",
+    "\n",
+    "    X = dataset['data']\n",
+    "    y = dataset['target']\n",
+    "\n",
+    "    validator = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=5)\n",
+    "\n",
+    "    for fidx, (train, test) in enumerate(validator.split(X, y, y)):\n",
+    "        X_train = X[train]\n",
+    "        X_test = X[test]\n",
+    "        y_train = y[train]\n",
+    "        y_test = y[test]\n",
+    "\n",
+    "        ss = StandardScaler()\n",
+    "        ss.fit(X_train)\n",
+    "        X_train = ss.transform(X_train)\n",
+    "        X_test = ss.transform(X_test)\n",
+    "\n",
+    "        for oversampler, oparam in oversamplers.items():\n",
+    "            for sparam in oparam:\n",
+    "                oversampling = oversampler(**sparam)\n",
+    "                X_samp, y_samp = oversampling.sample(X_train, y_train)\n",
+    "\n",
+    "                job = {\n",
+    "                    'X_samp': X_samp,\n",
+    "                    'y_samp': y_samp,\n",
+    "                    'X_test': X_test,\n",
+    "                    'y_test': y_test,\n",
+    "                }\n",
+    "\n",
+    "                description = {\n",
+    "                    'name': dataset['name'],\n",
+    "                    'fold': fidx,\n",
+    "                    'oversampler': oversampler.__name__,\n",
+    "                    'sparam': sparam,\n",
+    "                }\n",
+    "\n",
+    "                yield job, description"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def do_job(job, description):\n",
+    "    results = []\n",
+    "    for classifier, cparams in classifiers.items():\n",
+    "        for cparam in cparams:\n",
+    "            tmp = description.copy()\n",
+    "            classifier_obj = classifier(**cparam)\n",
+    "            classifier_obj.fit(job['X_samp'], job['y_samp'])\n",
+    "            y_pred = classifier_obj.predict_proba(job['X_test'])\n",
+    "            auc = roc_auc_score(job['y_test'], y_pred[:, 1])\n",
+    "\n",
+    "            tmp['classifier'] = classifier.__name__\n",
+    "            tmp['cparam'] = cparam\n",
+    "            tmp['auc'] = auc\n",
+    "            results.append(tmp)\n",
+    "\n",
+    "    return results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-12-16 17:39:43.203396 appendicitis\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "1it [00:00,  6.81it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-12-16 17:39:43.262076 appendicitis\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "5it [00:00, 10.71it/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "620it [08:17,  1.25it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "for data_loader in datasets:\n",
+    "    if data_loader != binclas.load_appendicitis:\n",
+    "        continue\n",
+    "    dataset = data_loader()\n",
+    "\n",
+    "    print(datetime.datetime.now(), dataset['name'])\n",
+    "\n",
+    "    results = Parallel(n_jobs=3)(delayed(do_job)(*x) for x in tqdm.tqdm(job_generator(data_loader)))\n",
+    "\n",
+    "    results = [\n",
+    "        x\n",
+    "        for xs in results\n",
+    "        for x in xs\n",
+    "    ]\n",
+    "\n",
+    "    results = pd.DataFrame.from_dict(results)\n",
+    "    results.to_csv(f\"{dataset['name']}-reg.csv\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "smote-variants",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/development/034-test-regularization.py b/notebooks/development/034-test-regularization.py
new file mode 100644
index 0000000..dcc8219
--- /dev/null
+++ b/notebooks/development/034-test-regularization.py
@@ -0,0 +1,142 @@
+# %%
+import datetime
+
+from joblib import Parallel, delayed
+
+import numpy as np
+import pandas as pd
+
+import tqdm
+
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.svm import SVC
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.model_selection import RepeatedStratifiedKFold
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import roc_auc_score
+
+from smote_variants.oversampling import SMOTE, NoSMOTE, ADASYN, Borderline_SMOTE1, ProWSyn, SMOTE_IPF, Lee, SMOBD
+from common_datasets.binary_classification import get_filtered_data_loaders
+import common_datasets.binary_classification as binclas
+
+# %%
+import logging
+logger = logging.getLogger('smote_variants')
+logger.setLevel(logging.ERROR)
+
+# %%
+classifiers = {
+DecisionTreeClassifier: [{'max_depth': md, 'random_state': 5} for md in [1, 2] + list(range(3, 18, 2))],
+RandomForestClassifier: [{'max_depth': md, 'random_state': 5, 'n_jobs': 1} for md in [1, 2] + list(range(3, 18, 2))],
+KNeighborsClassifier: [{'n_neighbors': nn, 'n_jobs': 1} for nn in range(1, 70, 4)],
+SVC: [{'C': c, 'probability': True, 'random_state': 5} for c in [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0, 5.0]]\
+            + [{'C': c, 'probability': True, 'kernel': 'poly', 'degree': 2, 'random_state': 5} for c in [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0, 5.0]]\
+            + [{'C': c, 'probability': True, 'kernel': 'poly', 'degree': 3, 'random_state': 5} for c in [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0, 5.0]]\
+}
+
+# %%
+datasets = get_filtered_data_loaders(n_col_bounds=(2, 50),
+                                        n_bounds=(10, 700),
+                                        n_minority_bounds=(10, 500),
+                                        n_from_phenotypes=1,
+                                        n_smallest=40)
+
+# %%
+datasets = [loader for loader in datasets if loader not in [binclas.load_iris0, binclas.load_dermatology_6, binclas.load_shuttle_6_vs_2_3, binclas.load_monk_2, binclas.load_new_thyroid1]]
+
+# %%
+len(datasets)
+
+# %%
+oversampler_classes = [SMOTE, Borderline_SMOTE1, ADASYN, ProWSyn, SMOTE_IPF, Lee, SMOBD, NoSMOTE]
+
+# %%
+oversamplers = {}
+for oversampler in oversampler_classes:
+    random_state = np.random.RandomState(5)
+    params = oversampler.parameter_combinations()
+    params = [comb for comb in params if comb.get('proportion', 1.0) == 1.0]
+    n_params = min(10, len(params))
+    oversamplers[oversampler] = random_state.choice(params, n_params, replace=False)
+
+# %%
+def job_generator(data_loader):
+
+    dataset = data_loader()
+
+    X = dataset['data']
+    y = dataset['target']
+
+    validator = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=5)
+
+    for fidx, (train, test) in enumerate(validator.split(X, y, y)):
+        X_train = X[train]
+        X_test = X[test]
+        y_train = y[train]
+        y_test = y[test]
+
+        ss = StandardScaler()
+        ss.fit(X_train)
+        X_train = ss.transform(X_train)
+        X_test = ss.transform(X_test)
+
+        for oversampler, oparam in oversamplers.items():
+            for sparam in oparam:
+                oversampling = oversampler(**sparam)
+                X_samp, y_samp = oversampling.sample(X_train, y_train)
+
+                job = {
+                    'X_samp': X_samp,
+                    'y_samp': y_samp,
+                    'X_test': X_test,
+                    'y_test': y_test,
+                }
+
+                description = {
+                    'name': dataset['name'],
+                    'fold': fidx,
+                    'oversampler': oversampler.__name__,
+                    'sparam': sparam,
+                }
+
+                yield job, description
+
+# %%
+def do_job(job, description):
+    results = []
+    for classifier, cparams in classifiers.items():
+        for cparam in cparams:
+            tmp = description.copy()
+            classifier_obj = classifier(**cparam)
+            classifier_obj.fit(job['X_samp'], job['y_samp'])
+            y_pred = classifier_obj.predict_proba(job['X_test'])
+            auc = roc_auc_score(job['y_test'], y_pred[:, 1])
+
+            tmp['classifier'] = classifier.__name__
+            tmp['cparam'] = cparam
+            tmp['auc'] = auc
+            results.append(tmp)
+
+    return results
+
+# %%
+for data_loader in datasets:
+    if data_loader != binclas.load_appendicitis:
+        continue
+    dataset = data_loader()
+
+    print(datetime.datetime.now(), dataset['name'])
+
+    results = Parallel(n_jobs=3)(delayed(do_job)(*x) for x in tqdm.tqdm(job_generator(data_loader)))
+
+    results = [
+        x
+        for xs in results
+        for x in xs
+    ]
+
+    results = pd.DataFrame.from_dict(results)
+    results.to_csv(f"{dataset['name']}-reg.csv")
+
+
diff --git a/notebooks/development/035-analysis-regularization.ipynb b/notebooks/development/035-analysis-regularization.ipynb
new file mode 100644
index 0000000..4482a42
--- /dev/null
+++ b/notebooks/development/035-analysis-regularization.ipynb
@@ -0,0 +1,1670 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from scipy.stats import wilcoxon\n",
+    "import common_datasets.binary_classification as binclas\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = pd.read_csv('appendicitis-reg.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grouped = data\\\n",
+    "    .groupby(['name', 'oversampler', 'sparam', 'classifier', 'cparam'])\\\n",
+    "    .apply(lambda pdf: pd.Series({'auc': pdf.sort_values('fold')['auc'].values.tolist()}))\\\n",
+    "    .reset_index(drop=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grouped['auc_mean'] = grouped['auc'].apply(np.mean)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def extract_reg_param(row):\n",
+    "    if row['classifier'] == 'SVC':\n",
+    "        return eval(row['cparam'])['C']\n",
+    "    if row['classifier'] == 'DecisionTreeClassifier':\n",
+    "        return eval(row['cparam'])['max_depth']\n",
+    "    if row['classifier'] == 'RandomForestClassifier':\n",
+    "        return eval(row['cparam'])['max_depth']\n",
+    "    if row['classifier'] == 'KNeighborsClassifier':\n",
+    "        return eval(row['cparam'])['n_neighbors']\n",
+    "\n",
+    "def extract_classifier_subparam(row):\n",
+    "    if row['classifier'] == 'SVC':\n",
+    "        kernel = eval(row['cparam']).get('kernel', 'rbf')\n",
+    "        degree = str(eval(row['cparam']).get('degree', ''))\n",
+    "        return kernel + degree\n",
+    "    return ''"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grouped['reg_param'] = grouped.apply(extract_reg_param, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   classifier kernel  reg_param  auc_mean  auc_mean_smote\n",
+      "0         SVC  poly2      0.001  0.711359        0.250122\n",
+      "3         SVC  poly2      0.002  0.713850        0.250703\n",
+      "6         SVC  poly2      0.005  0.713638        0.261456\n",
+      "9         SVC  poly2      0.010  0.710585        0.657913\n",
+      "12        SVC  poly2      0.020  0.710629        0.746131\n",
+      "15        SVC  poly2      0.050  0.710712        0.750371\n",
+      "18        SVC  poly2      0.100  0.713918        0.751929\n",
+      "21        SVC  poly2      0.200  0.723368        0.752891\n",
+      "24        SVC  poly2      0.500  0.725665        0.755069\n",
+      "27        SVC  poly2      1.000  0.720097        0.755713\n",
+      "30        SVC  poly2      2.000  0.718424        0.746403\n",
+      "   classifier kernel  reg_param  auc_mean  auc_mean_smote\n",
+      "1         SVC  poly3      0.001  0.821631        0.155966\n",
+      "4         SVC  poly3      0.002  0.821949        0.402381\n",
+      "7         SVC  poly3      0.005  0.820960        0.857509\n",
+      "10        SVC  poly3      0.010  0.820712        0.848819\n",
+      "13        SVC  poly3      0.020  0.819429        0.839507\n",
+      "16        SVC  poly3      0.050  0.809549        0.829846\n",
+      "19        SVC  poly3      0.100  0.800191        0.823304\n",
+      "22        SVC  poly3      0.200  0.792416        0.818324\n",
+      "25        SVC  poly3      0.500  0.784059        0.811646\n",
+      "28        SVC  poly3      1.000  0.778493        0.804654\n",
+      "31        SVC  poly3      2.000  0.762454        0.796621\n",
+      "   classifier kernel  reg_param  auc_mean  auc_mean_smote\n",
+      "2         SVC    rbf      0.001  0.800432        0.134971\n",
+      "5         SVC    rbf      0.002  0.794282        0.134971\n",
+      "8         SVC    rbf      0.005  0.794037        0.135968\n",
+      "11        SVC    rbf      0.010  0.795526        0.138422\n",
+      "14        SVC    rbf      0.020  0.795288        0.754365\n",
+      "17        SVC    rbf      0.050  0.795529        0.868762\n",
+      "20        SVC    rbf      0.100  0.795488        0.872091\n",
+      "23        SVC    rbf      0.200  0.795541        0.867499\n",
+      "26        SVC    rbf      0.500  0.791965        0.855222\n",
+      "29        SVC    rbf      1.000  0.797488        0.844791\n",
+      "32        SVC    rbf      2.000  0.813850        0.828971\n",
+      "               classifier kernel  reg_param  auc_mean  auc_mean_smote\n",
+      "4  DecisionTreeClassifier               3.0  0.750621        0.733844\n",
+      "5  DecisionTreeClassifier               5.0  0.737157        0.721297\n",
+      "6  DecisionTreeClassifier               7.0  0.716087        0.713525\n",
+      "7  DecisionTreeClassifier               9.0  0.713234        0.711046\n",
+      "0  DecisionTreeClassifier              11.0  0.712991        0.710782\n",
+      "1  DecisionTreeClassifier              13.0  0.712991        0.710768\n",
+      "2  DecisionTreeClassifier              15.0  0.712991        0.710768\n",
+      "3  DecisionTreeClassifier              17.0  0.712991        0.710768\n",
+      "               classifier kernel  reg_param  auc_mean  auc_mean_smote\n",
+      "4  RandomForestClassifier               3.0  0.838971        0.839129\n",
+      "5  RandomForestClassifier               5.0  0.828831        0.825497\n",
+      "6  RandomForestClassifier               7.0  0.825096        0.820869\n",
+      "7  RandomForestClassifier               9.0  0.824181        0.820987\n",
+      "0  RandomForestClassifier              11.0  0.824119        0.820993\n",
+      "1  RandomForestClassifier              13.0  0.824119        0.821001\n",
+      "2  RandomForestClassifier              15.0  0.824119        0.820988\n",
+      "3  RandomForestClassifier              17.0  0.824119        0.820988\n",
+      "              classifier kernel  reg_param  auc_mean  auc_mean_smote\n",
+      "2   KNeighborsClassifier               1.0  0.733643        0.709665\n",
+      "11  KNeighborsClassifier               5.0  0.779324        0.790846\n",
+      "12  KNeighborsClassifier               9.0  0.804013        0.809659\n",
+      "0   KNeighborsClassifier              13.0  0.815140        0.817610\n",
+      "1   KNeighborsClassifier              17.0  0.834174        0.827690\n",
+      "3   KNeighborsClassifier              21.0  0.830619        0.835022\n",
+      "4   KNeighborsClassifier              25.0  0.829724        0.834254\n",
+      "5   KNeighborsClassifier              29.0  0.843328        0.837459\n",
+      "6   KNeighborsClassifier              33.0  0.849118        0.842318\n",
+      "7   KNeighborsClassifier              37.0  0.849035        0.847653\n",
+      "8   KNeighborsClassifier              41.0  0.848688        0.851491\n",
+      "9   KNeighborsClassifier              45.0  0.841378        0.853685\n",
+      "10  KNeighborsClassifier              49.0  0.829066        0.854934\n"
+     ]
+    }
+   ],
+   "source": [
+    "for classifier in ['SVC', 'DecisionTreeClassifier', 'RandomForestClassifier', 'KNeighborsClassifier']:\n",
+    "    filtered = grouped[grouped['classifier'] == classifier]\n",
+    "    nosmote = filtered[filtered['oversampler'] == 'NoSMOTE']\n",
+    "    smote = filtered[filtered['oversampler'] == 'SMOTE']\n",
+    "    merged = pd.merge(nosmote,\n",
+    "                        smote[['reg_param', 'cparam', 'auc_mean']].rename(columns={'auc_mean': 'auc_mean_smote'}),\n",
+    "                        on=['reg_param', 'cparam'])\n",
+    "    merged['kernel'] = merged.apply(extract_classifier_subparam, axis=1)\n",
+    "\n",
+    "    kernels = merged['kernel'].drop_duplicates().values\n",
+    "\n",
+    "    for kernel in kernels:\n",
+    "        print(merged[merged['kernel'] == kernel].sort_values('reg_param')[['classifier', 'kernel', 'reg_param', 'auc_mean', 'auc_mean_smote']])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>reg_param</th>\n",
+       "      <th>auc_mean</th>\n",
+       "      <th>auc_mean_smote</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>KNeighborsClassifier</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.733643</td>\n",
+       "      <td>0.709665</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>KNeighborsClassifier</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>0.779324</td>\n",
+       "      <td>0.790846</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>KNeighborsClassifier</td>\n",
+       "      <td>9.0</td>\n",
+       "      <td>0.804013</td>\n",
+       "      <td>0.809659</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>KNeighborsClassifier</td>\n",
+       "      <td>13.0</td>\n",
+       "      <td>0.815140</td>\n",
+       "      <td>0.817610</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>KNeighborsClassifier</td>\n",
+       "      <td>17.0</td>\n",
+       "      <td>0.834174</td>\n",
+       "      <td>0.827690</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>KNeighborsClassifier</td>\n",
+       "      <td>21.0</td>\n",
+       "      <td>0.830619</td>\n",
+       "      <td>0.835022</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>KNeighborsClassifier</td>\n",
+       "      <td>25.0</td>\n",
+       "      <td>0.829724</td>\n",
+       "      <td>0.834254</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>KNeighborsClassifier</td>\n",
+       "      <td>29.0</td>\n",
+       "      <td>0.843328</td>\n",
+       "      <td>0.837459</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>KNeighborsClassifier</td>\n",
+       "      <td>33.0</td>\n",
+       "      <td>0.849118</td>\n",
+       "      <td>0.842318</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>KNeighborsClassifier</td>\n",
+       "      <td>37.0</td>\n",
+       "      <td>0.849035</td>\n",
+       "      <td>0.847653</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>KNeighborsClassifier</td>\n",
+       "      <td>41.0</td>\n",
+       "      <td>0.848688</td>\n",
+       "      <td>0.851491</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>KNeighborsClassifier</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>0.841378</td>\n",
+       "      <td>0.853685</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>KNeighborsClassifier</td>\n",
+       "      <td>49.0</td>\n",
+       "      <td>0.829066</td>\n",
+       "      <td>0.854934</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "              classifier  reg_param  auc_mean  auc_mean_smote\n",
+       "2   KNeighborsClassifier        1.0  0.733643        0.709665\n",
+       "11  KNeighborsClassifier        5.0  0.779324        0.790846\n",
+       "12  KNeighborsClassifier        9.0  0.804013        0.809659\n",
+       "0   KNeighborsClassifier       13.0  0.815140        0.817610\n",
+       "1   KNeighborsClassifier       17.0  0.834174        0.827690\n",
+       "3   KNeighborsClassifier       21.0  0.830619        0.835022\n",
+       "4   KNeighborsClassifier       25.0  0.829724        0.834254\n",
+       "5   KNeighborsClassifier       29.0  0.843328        0.837459\n",
+       "6   KNeighborsClassifier       33.0  0.849118        0.842318\n",
+       "7   KNeighborsClassifier       37.0  0.849035        0.847653\n",
+       "8   KNeighborsClassifier       41.0  0.848688        0.851491\n",
+       "9   KNeighborsClassifier       45.0  0.841378        0.853685\n",
+       "10  KNeighborsClassifier       49.0  0.829066        0.854934"
+      ]
+     },
+     "execution_count": 55,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged[merged['kernel'] == ''].sort_values('reg_param')[['classifier', 'reg_param', 'auc_mean', 'auc_mean_smote']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data['reg_param'] = data.apply(extract_reg_param, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>name</th>\n",
+       "      <th>fold</th>\n",
+       "      <th>oversampler</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc</th>\n",
+       "      <th>reg_param</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>SMOTE</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 3, 'random_state': 5}</td>\n",
+       "      <td>0.564706</td>\n",
+       "      <td>3.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>SMOTE</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 5, 'random_state': 5}</td>\n",
+       "      <td>0.600000</td>\n",
+       "      <td>5.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>SMOTE</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 7, 'random_state': 5}</td>\n",
+       "      <td>0.670588</td>\n",
+       "      <td>7.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>SMOTE</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 9, 'random_state': 5}</td>\n",
+       "      <td>0.670588</td>\n",
+       "      <td>9.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>appendicitis</td>\n",
+       "      <td>0</td>\n",
+       "      <td>SMOTE</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 11, 'random_state': 5}</td>\n",
+       "      <td>0.670588</td>\n",
+       "      <td>11.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Unnamed: 0          name  fold oversampler  \\\n",
+       "0           0  appendicitis     0       SMOTE   \n",
+       "1           1  appendicitis     0       SMOTE   \n",
+       "2           2  appendicitis     0       SMOTE   \n",
+       "3           3  appendicitis     0       SMOTE   \n",
+       "4           4  appendicitis     0       SMOTE   \n",
+       "\n",
+       "                                              sparam              classifier  \\\n",
+       "0  {'n_neighbors': 5, 'proportion': 1.0, 'random_...  DecisionTreeClassifier   \n",
+       "1  {'n_neighbors': 5, 'proportion': 1.0, 'random_...  DecisionTreeClassifier   \n",
+       "2  {'n_neighbors': 5, 'proportion': 1.0, 'random_...  DecisionTreeClassifier   \n",
+       "3  {'n_neighbors': 5, 'proportion': 1.0, 'random_...  DecisionTreeClassifier   \n",
+       "4  {'n_neighbors': 5, 'proportion': 1.0, 'random_...  DecisionTreeClassifier   \n",
+       "\n",
+       "                                 cparam       auc  reg_param  \n",
+       "0   {'max_depth': 3, 'random_state': 5}  0.564706        3.0  \n",
+       "1   {'max_depth': 5, 'random_state': 5}  0.600000        5.0  \n",
+       "2   {'max_depth': 7, 'random_state': 5}  0.670588        7.0  \n",
+       "3   {'max_depth': 9, 'random_state': 5}  0.670588        9.0  \n",
+       "4  {'max_depth': 11, 'random_state': 5}  0.670588       11.0  "
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data['sparam'] = data['sparam'].apply(eval)\n",
+    "data['cparam'] = data['cparam'].apply(eval)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def remove_key(dict, key):\n",
+    "    del dict[key]\n",
+    "    return dict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "KeyError",
+     "evalue": "'nn_params'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[60], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmetric\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mdata\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43msparam\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mx\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mx\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mnn_params\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mmetric_learning_method\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      2\u001b[0m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124msparam\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124msparam\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m x: remove_key(x, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnn_params\u001b[39m\u001b[38;5;124m'\u001b[39m))\n\u001b[1;32m      3\u001b[0m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcparam\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcparam\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28mstr\u001b[39m)\n",
+      "File \u001b[0;32m~/anaconda3/envs/smote-variants/lib/python3.10/site-packages/pandas/core/series.py:4760\u001b[0m, in \u001b[0;36mSeries.apply\u001b[0;34m(self, func, convert_dtype, args, by_row, **kwargs)\u001b[0m\n\u001b[1;32m   4625\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\n\u001b[1;32m   4626\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m   4627\u001b[0m     func: AggFuncType,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   4632\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m   4633\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m Series:\n\u001b[1;32m   4634\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m   4635\u001b[0m \u001b[38;5;124;03m    Invoke function on values of Series.\u001b[39;00m\n\u001b[1;32m   4636\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   4751\u001b[0m \u001b[38;5;124;03m    dtype: float64\u001b[39;00m\n\u001b[1;32m   4752\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[1;32m   4753\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mSeriesApply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   4754\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4755\u001b[0m \u001b[43m        \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4756\u001b[0m \u001b[43m        \u001b[49m\u001b[43mconvert_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4757\u001b[0m \u001b[43m        \u001b[49m\u001b[43mby_row\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mby_row\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4758\u001b[0m \u001b[43m        \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4759\u001b[0m \u001b[43m        \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m-> 4760\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/anaconda3/envs/smote-variants/lib/python3.10/site-packages/pandas/core/apply.py:1207\u001b[0m, in \u001b[0;36mSeriesApply.apply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1204\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapply_compat()\n\u001b[1;32m   1206\u001b[0m \u001b[38;5;66;03m# self.func is Callable\u001b[39;00m\n\u001b[0;32m-> 1207\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_standard\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/anaconda3/envs/smote-variants/lib/python3.10/site-packages/pandas/core/apply.py:1287\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1281\u001b[0m \u001b[38;5;66;03m# row-wise access\u001b[39;00m\n\u001b[1;32m   1282\u001b[0m \u001b[38;5;66;03m# apply doesn't have a `na_action` keyword and for backward compat reasons\u001b[39;00m\n\u001b[1;32m   1283\u001b[0m \u001b[38;5;66;03m# we need to give `na_action=\"ignore\"` for categorical data.\u001b[39;00m\n\u001b[1;32m   1284\u001b[0m \u001b[38;5;66;03m# TODO: remove the `na_action=\"ignore\"` when that default has been changed in\u001b[39;00m\n\u001b[1;32m   1285\u001b[0m \u001b[38;5;66;03m#  Categorical (GH51645).\u001b[39;00m\n\u001b[1;32m   1286\u001b[0m action \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(obj\u001b[38;5;241m.\u001b[39mdtype, CategoricalDtype) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1287\u001b[0m mapped \u001b[38;5;241m=\u001b[39m \u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_map_values\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1288\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmapper\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcurried\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\n\u001b[1;32m   1289\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1291\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(mapped) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(mapped[\u001b[38;5;241m0\u001b[39m], ABCSeries):\n\u001b[1;32m   1292\u001b[0m     \u001b[38;5;66;03m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[1;32m   1293\u001b[0m     \u001b[38;5;66;03m#  See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[1;32m   1294\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m obj\u001b[38;5;241m.\u001b[39m_constructor_expanddim(\u001b[38;5;28mlist\u001b[39m(mapped), index\u001b[38;5;241m=\u001b[39mobj\u001b[38;5;241m.\u001b[39mindex)\n",
+      "File \u001b[0;32m~/anaconda3/envs/smote-variants/lib/python3.10/site-packages/pandas/core/base.py:921\u001b[0m, in \u001b[0;36mIndexOpsMixin._map_values\u001b[0;34m(self, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m    918\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arr, ExtensionArray):\n\u001b[1;32m    919\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mmap(mapper, na_action\u001b[38;5;241m=\u001b[39mna_action)\n\u001b[0;32m--> 921\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43malgorithms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mna_action\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/anaconda3/envs/smote-variants/lib/python3.10/site-packages/pandas/core/algorithms.py:1814\u001b[0m, in \u001b[0;36mmap_array\u001b[0;34m(arr, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m   1812\u001b[0m values \u001b[38;5;241m=\u001b[39m arr\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mobject\u001b[39m, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m   1813\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m na_action \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1814\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_infer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1815\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1816\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mmap_infer_mask(\n\u001b[1;32m   1817\u001b[0m         values, mapper, mask\u001b[38;5;241m=\u001b[39misna(values)\u001b[38;5;241m.\u001b[39mview(np\u001b[38;5;241m.\u001b[39muint8), convert\u001b[38;5;241m=\u001b[39mconvert\n\u001b[1;32m   1818\u001b[0m     )\n",
+      "File \u001b[0;32mlib.pyx:2920\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n",
+      "Cell \u001b[0;32mIn[60], line 1\u001b[0m, in \u001b[0;36m<lambda>\u001b[0;34m(x)\u001b[0m\n\u001b[0;32m----> 1\u001b[0m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmetric\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124msparam\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m x: \u001b[43mx\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mnn_params\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmetric_learning_method\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[1;32m      2\u001b[0m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124msparam\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124msparam\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m x: remove_key(x, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnn_params\u001b[39m\u001b[38;5;124m'\u001b[39m))\n\u001b[1;32m      3\u001b[0m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcparam\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcparam\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28mstr\u001b[39m)\n",
+      "\u001b[0;31mKeyError\u001b[0m: 'nn_params'"
+     ]
+    }
+   ],
+   "source": [
+    "data['metric'] = data['sparam'].apply(lambda x: x['nn_params']['metric_learning_method'])\n",
+    "data['sparam'] = data['sparam'].apply(lambda x: remove_key(x, 'nn_params'))\n",
+    "data['cparam'] = data['cparam'].apply(str)\n",
+    "data['sparam'] = data['sparam'].apply(str)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['Unnamed: 0', 'name', 'fold', 'sparam', 'classifier', 'cparam', 'auc',\n",
+       "       'metric'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 67,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>name</th>\n",
+       "      <th>fold</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc</th>\n",
+       "      <th>metric</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>bupa</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 4, 'random_state': 5}</td>\n",
+       "      <td>0.737500</td>\n",
+       "      <td>id</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>bupa</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 6, 'random_state': 5}</td>\n",
+       "      <td>0.675431</td>\n",
+       "      <td>id</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>bupa</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 8, 'random_state': 5}</td>\n",
+       "      <td>0.681034</td>\n",
+       "      <td>id</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>bupa</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>{'max_depth': 4, 'random_state': 5}</td>\n",
+       "      <td>0.859483</td>\n",
+       "      <td>id</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>bupa</td>\n",
+       "      <td>0</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>{'max_depth': 6, 'random_state': 5}</td>\n",
+       "      <td>0.858621</td>\n",
+       "      <td>id</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Unnamed: 0  name  fold                                             sparam  \\\n",
+       "0           0  bupa     0  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "1           1  bupa     0  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "2           2  bupa     0  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "3           3  bupa     0  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "4           4  bupa     0  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "\n",
+       "               classifier                               cparam       auc  \\\n",
+       "0  DecisionTreeClassifier  {'max_depth': 4, 'random_state': 5}  0.737500   \n",
+       "1  DecisionTreeClassifier  {'max_depth': 6, 'random_state': 5}  0.675431   \n",
+       "2  DecisionTreeClassifier  {'max_depth': 8, 'random_state': 5}  0.681034   \n",
+       "3  RandomForestClassifier  {'max_depth': 4, 'random_state': 5}  0.859483   \n",
+       "4  RandomForestClassifier  {'max_depth': 6, 'random_state': 5}  0.858621   \n",
+       "\n",
+       "  metric  \n",
+       "0     id  \n",
+       "1     id  \n",
+       "2     id  \n",
+       "3     id  \n",
+       "4     id  "
+      ]
+     },
+     "execution_count": 68,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grouped = data.groupby(['name', 'sparam', 'classifier', 'cparam', 'metric']).apply(lambda pdf: pdf.sort_values('fold')['auc'].values.tolist())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grouped = grouped.reset_index(drop=False)\n",
+    "grouped = grouped.rename(columns={0: 'auc'})\n",
+    "determ = grouped[grouped['metric'] == 'MI_weighted'].drop(columns=['metric'])\n",
+    "rand = grouped[grouped['metric'] == 'id'].drop(columns=['metric'])\n",
+    "merged = pd.merge(determ.rename(columns={'auc': 'auc_det'}), rand, on=['name', 'sparam', 'classifier', 'cparam'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "merged['auc_mean_det'] = merged['auc_det'].apply(np.mean)\n",
+    "merged['auc_std_det'] = merged['auc_det'].apply(np.std)\n",
+    "merged['auc_min_det'] = merged['auc_det'].apply(np.min)\n",
+    "merged['auc_max_det'] = merged['auc_det'].apply(np.max)\n",
+    "merged['auc_mean'] = merged['auc'].apply(np.mean)\n",
+    "merged['auc_std'] = merged['auc'].apply(np.std)\n",
+    "merged['auc_min'] = merged['auc'].apply(np.min)\n",
+    "merged['auc_max'] = merged['auc'].apply(np.max)\n",
+    "merged['p_l'] = merged.apply(lambda row: wilcoxon(row['auc_det'], row['auc'], zero_method='zsplit', alternative='less').pvalue, axis=1)\n",
+    "merged['p_g'] = merged.apply(lambda row: wilcoxon(row['auc_det'], row['auc'], zero_method='zsplit', alternative='greater').pvalue, axis=1)\n",
+    "merged['f_l'] = merged['p_l'] < 0.05\n",
+    "merged['f_g'] = merged['p_g'] < 0.05"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def model_selection(pdf):\n",
+    "    max_det = pdf[pdf['auc_mean_det'] == pdf['auc_mean_det'].max()].iloc[0]\n",
+    "    max_ran = pdf[pdf['auc_mean'] == pdf['auc_mean'].max()].iloc[0]\n",
+    "    return pd.Series({'auc_mean_det': max_det['auc_mean_det'],\n",
+    "            'auc_mean': max_ran['auc_mean'],\n",
+    "            'auc_std_det': max_det['auc_std_det'],\n",
+    "            'auc_std': max_ran['auc_std'],\n",
+    "            'p_l': wilcoxon(max_det['auc_det'], max_ran['auc'], zero_method='zsplit', alternative='less').pvalue,\n",
+    "            'p_g': wilcoxon(max_det['auc_det'], max_ran['auc'], zero_method='zsplit', alternative='greater').pvalue,\n",
+    "            'auc_median_det': np.median(max_det['auc_det']),\n",
+    "            'auc_median': np.median(max_ran['auc'])})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>auc_mean_det</th>\n",
+       "      <th>auc_mean</th>\n",
+       "      <th>auc_std_det</th>\n",
+       "      <th>auc_std</th>\n",
+       "      <th>p_l</th>\n",
+       "      <th>p_g</th>\n",
+       "      <th>auc_median_det</th>\n",
+       "      <th>auc_median</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>classifier</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>DecisionTreeClassifier</th>\n",
+       "      <td>0.661172</td>\n",
+       "      <td>0.661109</td>\n",
+       "      <td>0.060384</td>\n",
+       "      <td>0.059722</td>\n",
+       "      <td>0.443313</td>\n",
+       "      <td>0.556687</td>\n",
+       "      <td>0.662931</td>\n",
+       "      <td>0.664224</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>KNeighborsClassifier</th>\n",
+       "      <td>0.659195</td>\n",
+       "      <td>0.658767</td>\n",
+       "      <td>0.055252</td>\n",
+       "      <td>0.055262</td>\n",
+       "      <td>0.665874</td>\n",
+       "      <td>0.334126</td>\n",
+       "      <td>0.661207</td>\n",
+       "      <td>0.657543</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>RandomForestClassifier</th>\n",
+       "      <td>0.762595</td>\n",
+       "      <td>0.764598</td>\n",
+       "      <td>0.052691</td>\n",
+       "      <td>0.052473</td>\n",
+       "      <td>0.002484</td>\n",
+       "      <td>0.997516</td>\n",
+       "      <td>0.765517</td>\n",
+       "      <td>0.766810</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SVC</th>\n",
+       "      <td>0.650130</td>\n",
+       "      <td>0.650833</td>\n",
+       "      <td>0.065829</td>\n",
+       "      <td>0.065979</td>\n",
+       "      <td>0.001734</td>\n",
+       "      <td>0.998266</td>\n",
+       "      <td>0.651724</td>\n",
+       "      <td>0.652155</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                        auc_mean_det  auc_mean  auc_std_det   auc_std  \\\n",
+       "classifier                                                              \n",
+       "DecisionTreeClassifier      0.661172  0.661109     0.060384  0.059722   \n",
+       "KNeighborsClassifier        0.659195  0.658767     0.055252  0.055262   \n",
+       "RandomForestClassifier      0.762595  0.764598     0.052691  0.052473   \n",
+       "SVC                         0.650130  0.650833     0.065829  0.065979   \n",
+       "\n",
+       "                             p_l       p_g  auc_median_det  auc_median  \n",
+       "classifier                                                              \n",
+       "DecisionTreeClassifier  0.443313  0.556687        0.662931    0.664224  \n",
+       "KNeighborsClassifier    0.665874  0.334126        0.661207    0.657543  \n",
+       "RandomForestClassifier  0.002484  0.997516        0.765517    0.766810  \n",
+       "SVC                     0.001734  0.998266        0.651724    0.652155  "
+      ]
+     },
+     "execution_count": 73,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged.groupby('classifier').apply(model_selection)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc_det</th>\n",
+       "      <th>auc</th>\n",
+       "      <th>auc_mean_det</th>\n",
+       "      <th>auc_std_det</th>\n",
+       "      <th>auc_min_det</th>\n",
+       "      <th>auc_max_det</th>\n",
+       "      <th>auc_mean</th>\n",
+       "      <th>auc_std</th>\n",
+       "      <th>auc_min</th>\n",
+       "      <th>auc_max</th>\n",
+       "      <th>p_l</th>\n",
+       "      <th>p_g</th>\n",
+       "      <th>f_l</th>\n",
+       "      <th>f_g</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>classifier</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>DecisionTreeClassifier</th>\n",
+       "      <th>0</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 4, 'random_state': 5}</td>\n",
+       "      <td>[0.6857758620689656, 0.5900862068965518, 0.638...</td>\n",
+       "      <td>[0.7375, 0.6668103448275862, 0.7, 0.6504310344...</td>\n",
+       "      <td>0.661172</td>\n",
+       "      <td>0.060384</td>\n",
+       "      <td>0.458190</td>\n",
+       "      <td>0.855603</td>\n",
+       "      <td>0.661109</td>\n",
+       "      <td>0.059722</td>\n",
+       "      <td>0.475000</td>\n",
+       "      <td>0.833621</td>\n",
+       "      <td>0.443313</td>\n",
+       "      <td>0.556687</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>KNeighborsClassifier</th>\n",
+       "      <th>4</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>KNeighborsClassifier</td>\n",
+       "      <td>{'n_neighbors': 3}</td>\n",
+       "      <td>[0.6133620689655173, 0.6340517241379311, 0.630...</td>\n",
+       "      <td>[0.6051724137931035, 0.6189655172413793, 0.637...</td>\n",
+       "      <td>0.659195</td>\n",
+       "      <td>0.055252</td>\n",
+       "      <td>0.482759</td>\n",
+       "      <td>0.853448</td>\n",
+       "      <td>0.658767</td>\n",
+       "      <td>0.055262</td>\n",
+       "      <td>0.492241</td>\n",
+       "      <td>0.853448</td>\n",
+       "      <td>0.665874</td>\n",
+       "      <td>0.334126</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>RandomForestClassifier</th>\n",
+       "      <th>10</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>{'max_depth': 8, 'random_state': 5}</td>\n",
+       "      <td>[0.8543103448275862, 0.7698275862068966, 0.755...</td>\n",
+       "      <td>[0.8551724137931035, 0.7422413793103448, 0.755...</td>\n",
+       "      <td>0.762595</td>\n",
+       "      <td>0.052691</td>\n",
+       "      <td>0.603448</td>\n",
+       "      <td>0.928448</td>\n",
+       "      <td>0.764598</td>\n",
+       "      <td>0.052473</td>\n",
+       "      <td>0.606034</td>\n",
+       "      <td>0.923276</td>\n",
+       "      <td>0.002484</td>\n",
+       "      <td>0.997516</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SVC</th>\n",
+       "      <th>19</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>SVC</td>\n",
+       "      <td>{'C': 0.1, 'probability': True, 'random_state'...</td>\n",
+       "      <td>[0.746551724137931, 0.6741379310344828, 0.6362...</td>\n",
+       "      <td>[0.7543103448275862, 0.6715517241379311, 0.634...</td>\n",
+       "      <td>0.650130</td>\n",
+       "      <td>0.065829</td>\n",
+       "      <td>0.437069</td>\n",
+       "      <td>0.850862</td>\n",
+       "      <td>0.650833</td>\n",
+       "      <td>0.065979</td>\n",
+       "      <td>0.434483</td>\n",
+       "      <td>0.850862</td>\n",
+       "      <td>0.001734</td>\n",
+       "      <td>0.998266</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                           name  \\\n",
+       "classifier                        \n",
+       "DecisionTreeClassifier 0   bupa   \n",
+       "KNeighborsClassifier   4   bupa   \n",
+       "RandomForestClassifier 10  bupa   \n",
+       "SVC                    19  bupa   \n",
+       "\n",
+       "                                                                      sparam  \\\n",
+       "classifier                                                                     \n",
+       "DecisionTreeClassifier 0   {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "KNeighborsClassifier   4   {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "RandomForestClassifier 10  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "SVC                    19  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "\n",
+       "                                       classifier  \\\n",
+       "classifier                                          \n",
+       "DecisionTreeClassifier 0   DecisionTreeClassifier   \n",
+       "KNeighborsClassifier   4     KNeighborsClassifier   \n",
+       "RandomForestClassifier 10  RandomForestClassifier   \n",
+       "SVC                    19                     SVC   \n",
+       "\n",
+       "                                                                      cparam  \\\n",
+       "classifier                                                                     \n",
+       "DecisionTreeClassifier 0                 {'max_depth': 4, 'random_state': 5}   \n",
+       "KNeighborsClassifier   4                                  {'n_neighbors': 3}   \n",
+       "RandomForestClassifier 10                {'max_depth': 8, 'random_state': 5}   \n",
+       "SVC                    19  {'C': 0.1, 'probability': True, 'random_state'...   \n",
+       "\n",
+       "                                                                     auc_det  \\\n",
+       "classifier                                                                     \n",
+       "DecisionTreeClassifier 0   [0.6857758620689656, 0.5900862068965518, 0.638...   \n",
+       "KNeighborsClassifier   4   [0.6133620689655173, 0.6340517241379311, 0.630...   \n",
+       "RandomForestClassifier 10  [0.8543103448275862, 0.7698275862068966, 0.755...   \n",
+       "SVC                    19  [0.746551724137931, 0.6741379310344828, 0.6362...   \n",
+       "\n",
+       "                                                                         auc  \\\n",
+       "classifier                                                                     \n",
+       "DecisionTreeClassifier 0   [0.7375, 0.6668103448275862, 0.7, 0.6504310344...   \n",
+       "KNeighborsClassifier   4   [0.6051724137931035, 0.6189655172413793, 0.637...   \n",
+       "RandomForestClassifier 10  [0.8551724137931035, 0.7422413793103448, 0.755...   \n",
+       "SVC                    19  [0.7543103448275862, 0.6715517241379311, 0.634...   \n",
+       "\n",
+       "                           auc_mean_det  auc_std_det  auc_min_det  \\\n",
+       "classifier                                                          \n",
+       "DecisionTreeClassifier 0       0.661172     0.060384     0.458190   \n",
+       "KNeighborsClassifier   4       0.659195     0.055252     0.482759   \n",
+       "RandomForestClassifier 10      0.762595     0.052691     0.603448   \n",
+       "SVC                    19      0.650130     0.065829     0.437069   \n",
+       "\n",
+       "                           auc_max_det  auc_mean   auc_std   auc_min  \\\n",
+       "classifier                                                             \n",
+       "DecisionTreeClassifier 0      0.855603  0.661109  0.059722  0.475000   \n",
+       "KNeighborsClassifier   4      0.853448  0.658767  0.055262  0.492241   \n",
+       "RandomForestClassifier 10     0.928448  0.764598  0.052473  0.606034   \n",
+       "SVC                    19     0.850862  0.650833  0.065979  0.434483   \n",
+       "\n",
+       "                            auc_max       p_l       p_g    f_l    f_g  \n",
+       "classifier                                                             \n",
+       "DecisionTreeClassifier 0   0.833621  0.443313  0.556687  False  False  \n",
+       "KNeighborsClassifier   4   0.853448  0.665874  0.334126  False  False  \n",
+       "RandomForestClassifier 10  0.923276  0.002484  0.997516   True  False  \n",
+       "SVC                    19  0.850862  0.001734  0.998266   True  False  "
+      ]
+     },
+     "execution_count": 74,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged.groupby('classifier').apply(lambda pdf: pdf[pdf['auc_mean_det'] == pdf['auc_mean_det'].max()])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc_det</th>\n",
+       "      <th>auc</th>\n",
+       "      <th>auc_mean_det</th>\n",
+       "      <th>auc_std_det</th>\n",
+       "      <th>auc_min_det</th>\n",
+       "      <th>auc_max_det</th>\n",
+       "      <th>auc_mean</th>\n",
+       "      <th>auc_std</th>\n",
+       "      <th>auc_min</th>\n",
+       "      <th>auc_max</th>\n",
+       "      <th>p_l</th>\n",
+       "      <th>p_g</th>\n",
+       "      <th>f_l</th>\n",
+       "      <th>f_g</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>{'max_depth': 8, 'random_state': 5}</td>\n",
+       "      <td>[0.8543103448275862, 0.7698275862068966, 0.755...</td>\n",
+       "      <td>[0.8551724137931035, 0.7422413793103448, 0.755...</td>\n",
+       "      <td>0.762595</td>\n",
+       "      <td>0.052691</td>\n",
+       "      <td>0.603448</td>\n",
+       "      <td>0.928448</td>\n",
+       "      <td>0.764598</td>\n",
+       "      <td>0.052473</td>\n",
+       "      <td>0.606034</td>\n",
+       "      <td>0.923276</td>\n",
+       "      <td>0.002484</td>\n",
+       "      <td>0.997516</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    name                                             sparam  \\\n",
+       "10  bupa  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "\n",
+       "                classifier                               cparam  \\\n",
+       "10  RandomForestClassifier  {'max_depth': 8, 'random_state': 5}   \n",
+       "\n",
+       "                                              auc_det  \\\n",
+       "10  [0.8543103448275862, 0.7698275862068966, 0.755...   \n",
+       "\n",
+       "                                                  auc  auc_mean_det  \\\n",
+       "10  [0.8551724137931035, 0.7422413793103448, 0.755...      0.762595   \n",
+       "\n",
+       "    auc_std_det  auc_min_det  auc_max_det  auc_mean   auc_std   auc_min  \\\n",
+       "10     0.052691     0.603448     0.928448  0.764598  0.052473  0.606034   \n",
+       "\n",
+       "     auc_max       p_l       p_g   f_l    f_g  \n",
+       "10  0.923276  0.002484  0.997516  True  False  "
+      ]
+     },
+     "execution_count": 75,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged[merged['auc_mean_det'] == merged['auc_mean_det'].max()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc_det</th>\n",
+       "      <th>auc</th>\n",
+       "      <th>auc_mean_det</th>\n",
+       "      <th>auc_std_det</th>\n",
+       "      <th>auc_min_det</th>\n",
+       "      <th>auc_max_det</th>\n",
+       "      <th>auc_mean</th>\n",
+       "      <th>auc_std</th>\n",
+       "      <th>auc_min</th>\n",
+       "      <th>auc_max</th>\n",
+       "      <th>p_l</th>\n",
+       "      <th>p_g</th>\n",
+       "      <th>f_l</th>\n",
+       "      <th>f_g</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>classifier</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>DecisionTreeClassifier</th>\n",
+       "      <th>0</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>DecisionTreeClassifier</td>\n",
+       "      <td>{'max_depth': 4, 'random_state': 5}</td>\n",
+       "      <td>[0.6857758620689656, 0.5900862068965518, 0.638...</td>\n",
+       "      <td>[0.7375, 0.6668103448275862, 0.7, 0.6504310344...</td>\n",
+       "      <td>0.661172</td>\n",
+       "      <td>0.060384</td>\n",
+       "      <td>0.458190</td>\n",
+       "      <td>0.855603</td>\n",
+       "      <td>0.661109</td>\n",
+       "      <td>0.059722</td>\n",
+       "      <td>0.475000</td>\n",
+       "      <td>0.833621</td>\n",
+       "      <td>0.443313</td>\n",
+       "      <td>0.556687</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>KNeighborsClassifier</th>\n",
+       "      <th>4</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>KNeighborsClassifier</td>\n",
+       "      <td>{'n_neighbors': 3}</td>\n",
+       "      <td>[0.6133620689655173, 0.6340517241379311, 0.630...</td>\n",
+       "      <td>[0.6051724137931035, 0.6189655172413793, 0.637...</td>\n",
+       "      <td>0.659195</td>\n",
+       "      <td>0.055252</td>\n",
+       "      <td>0.482759</td>\n",
+       "      <td>0.853448</td>\n",
+       "      <td>0.658767</td>\n",
+       "      <td>0.055262</td>\n",
+       "      <td>0.492241</td>\n",
+       "      <td>0.853448</td>\n",
+       "      <td>0.665874</td>\n",
+       "      <td>0.334126</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>RandomForestClassifier</th>\n",
+       "      <th>10</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>{'max_depth': 8, 'random_state': 5}</td>\n",
+       "      <td>[0.8543103448275862, 0.7698275862068966, 0.755...</td>\n",
+       "      <td>[0.8551724137931035, 0.7422413793103448, 0.755...</td>\n",
+       "      <td>0.762595</td>\n",
+       "      <td>0.052691</td>\n",
+       "      <td>0.603448</td>\n",
+       "      <td>0.928448</td>\n",
+       "      <td>0.764598</td>\n",
+       "      <td>0.052473</td>\n",
+       "      <td>0.606034</td>\n",
+       "      <td>0.923276</td>\n",
+       "      <td>0.002484</td>\n",
+       "      <td>0.997516</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SVC</th>\n",
+       "      <th>19</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>SVC</td>\n",
+       "      <td>{'C': 0.1, 'probability': True, 'random_state'...</td>\n",
+       "      <td>[0.746551724137931, 0.6741379310344828, 0.6362...</td>\n",
+       "      <td>[0.7543103448275862, 0.6715517241379311, 0.634...</td>\n",
+       "      <td>0.650130</td>\n",
+       "      <td>0.065829</td>\n",
+       "      <td>0.437069</td>\n",
+       "      <td>0.850862</td>\n",
+       "      <td>0.650833</td>\n",
+       "      <td>0.065979</td>\n",
+       "      <td>0.434483</td>\n",
+       "      <td>0.850862</td>\n",
+       "      <td>0.001734</td>\n",
+       "      <td>0.998266</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                           name  \\\n",
+       "classifier                        \n",
+       "DecisionTreeClassifier 0   bupa   \n",
+       "KNeighborsClassifier   4   bupa   \n",
+       "RandomForestClassifier 10  bupa   \n",
+       "SVC                    19  bupa   \n",
+       "\n",
+       "                                                                      sparam  \\\n",
+       "classifier                                                                     \n",
+       "DecisionTreeClassifier 0   {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "KNeighborsClassifier   4   {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "RandomForestClassifier 10  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "SVC                    19  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "\n",
+       "                                       classifier  \\\n",
+       "classifier                                          \n",
+       "DecisionTreeClassifier 0   DecisionTreeClassifier   \n",
+       "KNeighborsClassifier   4     KNeighborsClassifier   \n",
+       "RandomForestClassifier 10  RandomForestClassifier   \n",
+       "SVC                    19                     SVC   \n",
+       "\n",
+       "                                                                      cparam  \\\n",
+       "classifier                                                                     \n",
+       "DecisionTreeClassifier 0                 {'max_depth': 4, 'random_state': 5}   \n",
+       "KNeighborsClassifier   4                                  {'n_neighbors': 3}   \n",
+       "RandomForestClassifier 10                {'max_depth': 8, 'random_state': 5}   \n",
+       "SVC                    19  {'C': 0.1, 'probability': True, 'random_state'...   \n",
+       "\n",
+       "                                                                     auc_det  \\\n",
+       "classifier                                                                     \n",
+       "DecisionTreeClassifier 0   [0.6857758620689656, 0.5900862068965518, 0.638...   \n",
+       "KNeighborsClassifier   4   [0.6133620689655173, 0.6340517241379311, 0.630...   \n",
+       "RandomForestClassifier 10  [0.8543103448275862, 0.7698275862068966, 0.755...   \n",
+       "SVC                    19  [0.746551724137931, 0.6741379310344828, 0.6362...   \n",
+       "\n",
+       "                                                                         auc  \\\n",
+       "classifier                                                                     \n",
+       "DecisionTreeClassifier 0   [0.7375, 0.6668103448275862, 0.7, 0.6504310344...   \n",
+       "KNeighborsClassifier   4   [0.6051724137931035, 0.6189655172413793, 0.637...   \n",
+       "RandomForestClassifier 10  [0.8551724137931035, 0.7422413793103448, 0.755...   \n",
+       "SVC                    19  [0.7543103448275862, 0.6715517241379311, 0.634...   \n",
+       "\n",
+       "                           auc_mean_det  auc_std_det  auc_min_det  \\\n",
+       "classifier                                                          \n",
+       "DecisionTreeClassifier 0       0.661172     0.060384     0.458190   \n",
+       "KNeighborsClassifier   4       0.659195     0.055252     0.482759   \n",
+       "RandomForestClassifier 10      0.762595     0.052691     0.603448   \n",
+       "SVC                    19      0.650130     0.065829     0.437069   \n",
+       "\n",
+       "                           auc_max_det  auc_mean   auc_std   auc_min  \\\n",
+       "classifier                                                             \n",
+       "DecisionTreeClassifier 0      0.855603  0.661109  0.059722  0.475000   \n",
+       "KNeighborsClassifier   4      0.853448  0.658767  0.055262  0.492241   \n",
+       "RandomForestClassifier 10     0.928448  0.764598  0.052473  0.606034   \n",
+       "SVC                    19     0.850862  0.650833  0.065979  0.434483   \n",
+       "\n",
+       "                            auc_max       p_l       p_g    f_l    f_g  \n",
+       "classifier                                                             \n",
+       "DecisionTreeClassifier 0   0.833621  0.443313  0.556687  False  False  \n",
+       "KNeighborsClassifier   4   0.853448  0.665874  0.334126  False  False  \n",
+       "RandomForestClassifier 10  0.923276  0.002484  0.997516   True  False  \n",
+       "SVC                    19  0.850862  0.001734  0.998266   True  False  "
+      ]
+     },
+     "execution_count": 76,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged.groupby('classifier').apply(lambda pdf: pdf[pdf['auc_mean'] == pdf['auc_mean'].max()])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\"{'C': 0.1, 'probability': True, 'random_state': 5}\""
+      ]
+     },
+     "execution_count": 82,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tmp = merged.groupby('classifier').apply(lambda pdf: pdf[pdf['auc_mean'] == pdf['auc_mean'].max()])\n",
+    "tmp.iloc[-1]['cparam']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>sparam</th>\n",
+       "      <th>classifier</th>\n",
+       "      <th>cparam</th>\n",
+       "      <th>auc_det</th>\n",
+       "      <th>auc</th>\n",
+       "      <th>auc_mean_det</th>\n",
+       "      <th>auc_std_det</th>\n",
+       "      <th>auc_min_det</th>\n",
+       "      <th>auc_max_det</th>\n",
+       "      <th>auc_mean</th>\n",
+       "      <th>auc_std</th>\n",
+       "      <th>auc_min</th>\n",
+       "      <th>auc_max</th>\n",
+       "      <th>p_l</th>\n",
+       "      <th>p_g</th>\n",
+       "      <th>f_l</th>\n",
+       "      <th>f_g</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>bupa</td>\n",
+       "      <td>{'n_neighbors': 5, 'proportion': 1.0, 'random_...</td>\n",
+       "      <td>RandomForestClassifier</td>\n",
+       "      <td>{'max_depth': 8, 'random_state': 5}</td>\n",
+       "      <td>[0.8543103448275862, 0.7698275862068966, 0.755...</td>\n",
+       "      <td>[0.8551724137931035, 0.7422413793103448, 0.755...</td>\n",
+       "      <td>0.762595</td>\n",
+       "      <td>0.052691</td>\n",
+       "      <td>0.603448</td>\n",
+       "      <td>0.928448</td>\n",
+       "      <td>0.764598</td>\n",
+       "      <td>0.052473</td>\n",
+       "      <td>0.606034</td>\n",
+       "      <td>0.923276</td>\n",
+       "      <td>0.002484</td>\n",
+       "      <td>0.997516</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    name                                             sparam  \\\n",
+       "10  bupa  {'n_neighbors': 5, 'proportion': 1.0, 'random_...   \n",
+       "\n",
+       "                classifier                               cparam  \\\n",
+       "10  RandomForestClassifier  {'max_depth': 8, 'random_state': 5}   \n",
+       "\n",
+       "                                              auc_det  \\\n",
+       "10  [0.8543103448275862, 0.7698275862068966, 0.755...   \n",
+       "\n",
+       "                                                  auc  auc_mean_det  \\\n",
+       "10  [0.8551724137931035, 0.7422413793103448, 0.755...      0.762595   \n",
+       "\n",
+       "    auc_std_det  auc_min_det  auc_max_det  auc_mean   auc_std   auc_min  \\\n",
+       "10     0.052691     0.603448     0.928448  0.764598  0.052473  0.606034   \n",
+       "\n",
+       "     auc_max       p_l       p_g   f_l    f_g  \n",
+       "10  0.923276  0.002484  0.997516  True  False  "
+      ]
+     },
+     "execution_count": 77,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged[merged['auc_mean'] == merged['auc_mean'].max()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "auc_mean        0.610534\n",
+       "auc_std         0.052280\n",
+       "auc_mean_det    0.610252\n",
+       "auc_std_det     0.052778\n",
+       "p_l             0.405746\n",
+       "p_g             0.594254\n",
+       "f_l             0.250000\n",
+       "f_g             0.100000\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 78,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged[['auc_mean', 'auc_std', 'auc_mean_det', 'auc_std_det', 'p_l', 'p_g', 'f_l', 'f_g']].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "auc_min        0.429138\n",
+       "auc_max        0.770366\n",
+       "auc_min_det    0.425280\n",
+       "auc_max_det    0.773621\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 79,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged[['auc_min', 'auc_max', 'auc_min_det', 'auc_max_det']].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "auc_mean        0.630787\n",
+       "auc_std         0.057362\n",
+       "auc_mean_det    0.630810\n",
+       "auc_std_det     0.057514\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 80,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged[['auc_mean', 'auc_std', 'auc_mean_det', 'auc_std_det']].median()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "smote-variants",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/development/tmp.ipynb b/notebooks/development/tmp.ipynb
new file mode 100644
index 0000000..9d6f2ba
--- /dev/null
+++ b/notebooks/development/tmp.ipynb
@@ -0,0 +1,168 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from mlscorecheck.aggregated import fold_partitioning_generator"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2 50\n",
+      "3 1633\n",
+      "4 3581\n",
+      "5 14090\n",
+      "6 483176\n",
+      "7 2040776\n",
+      "8 139143\n",
+      "9 4342190\n",
+      "10 246448\n",
+      "11 7138953\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[27], line 4\u001b[0m\n\u001b[1;32m      2\u001b[0m count \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m _ \u001b[38;5;129;01min\u001b[39;00m fold_partitioning_generator(p\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m100\u001b[39m, n\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m100\u001b[39m, k\u001b[38;5;241m=\u001b[39mk):\n\u001b[0;32m----> 4\u001b[0m     count \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m      5\u001b[0m \u001b[38;5;28mprint\u001b[39m(k, count)\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "for k in range(2, 100):\n",
+    "    count = 0\n",
+    "    for _ in fold_partitioning_generator(p=100, n=100, k=k):\n",
+    "        count += 1\n",
+    "    print(k, count)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "def factorial(n):\n",
+    "    res = 1.0\n",
+    "    for idx in range(1, n+1):\n",
+    "        res*= idx\n",
+    "    return res\n",
+    "\n",
+    "def partitions(n, k):\n",
+    "    return factorial(n)/(factorial(k)**(n/k)*factorial(int(n/k)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2 3.5621427058240887e+162\n",
+      "3 3.626801540867747e+197\n",
+      "4 2.572363629860447e+206\n",
+      "5 1.2282742117030945e+205\n",
+      "6 1.5826523735488407e+202\n",
+      "7 1.1103610594874628e+198\n",
+      "8 2.7433529171014324e+193\n",
+      "9 1.9492961457735544e+188\n",
+      "10 7.063610910334588e+181\n",
+      "11 7.2886230189539895e+177\n",
+      "12 3.2146769992502e+172\n",
+      "13 1.5115610004254664e+168\n",
+      "14 4.0785530300903386e+163\n",
+      "15 2.2321627264014143e+158\n",
+      "16 2.4791858123335247e+154\n",
+      "17 1.394364796130934e+151\n",
+      "18 5.098077439183687e+146\n",
+      "19 2.4740386235690717e+143\n",
+      "20 2.3728024522013827e+139\n",
+      "21 3.451392153010874e+135\n",
+      "22 5.140441315924067e+132\n",
+      "23 1.550064934282854e+129\n",
+      "24 6.442561010058904e+125\n",
+      "25 3.601142943963805e+122\n",
+      "26 3.178184745685334e+120\n",
+      "27 3.016011757262844e+117\n",
+      "28 3.628951497682093e+114\n",
+      "29 5.449810242010832e+111\n",
+      "30 1.0070550335094754e+109\n",
+      "31 1.1302251970828393e+107\n",
+      "32 3.045675448556612e+104\n",
+      "33 9.748430425758596e+101\n",
+      "34 3.6700428573340127e+99\n",
+      "35 1.610642116094514e+97\n",
+      "36 8.172210536057132e+94\n",
+      "37 4.757683690007036e+92\n",
+      "38 1.2623592927902406e+91\n",
+      "39 9.479061849256144e+88\n",
+      "40 8.00924709229139e+86\n",
+      "41 7.572458670181864e+84\n",
+      "42 7.969747815064958e+82\n",
+      "43 9.292051051329801e+80\n",
+      "44 1.1947431387602131e+79\n",
+      "45 1.6869322862438707e+77\n",
+      "46 2.6053246405122032e+75\n",
+      "47 4.3848537730394063e+73\n",
+      "48 8.014265931916633e+71\n",
+      "49 1.585493661024941e+70\n"
+     ]
+    }
+   ],
+   "source": [
+    "p = 150\n",
+    "n = 50\n",
+    "\n",
+    "for k in range(2, min(p, n)):\n",
+    "\n",
+    "    print(k, partitions(n, k)*partitions(p,k))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "smote-variants",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/smote_variants/base/_metrictensor.py b/smote_variants/base/_metrictensor.py
index 45a54a0..bddecf6 100755
--- a/smote_variants/base/_metrictensor.py
+++ b/smote_variants/base/_metrictensor.py
@@ -584,7 +584,8 @@ def tensor(self, X, y):
         if self.metric_learning_method == 'ITML':
             self.metric_tensor = self._train_metric_learning(X_mod,
                                                 y_mod,
-                                                self.metric_learning_method)
+                                                self.metric_learning_method,
+                                                random_state=5)
         elif self.metric_learning_method == 'rf':
             dissim= ClassifierImpliedDissimilarityMatrix().fit(X, y)\
                                             .dissimilarity_matrix(X)
@@ -592,7 +593,8 @@ def tensor(self, X, y):
         elif self.metric_learning_method == 'LSML':
             self.metric_tensor = self._train_metric_learning(X_mod,
                                                 y_mod,
-                                                self.metric_learning_method)
+                                                self.metric_learning_method,
+                                                random_state=5)
         elif self.metric_learning_method == 'cov':
             self.metric_tensor = np.linalg.inv(fix_pd_matrix(np.cov(X.T)))
         elif self.metric_learning_method == 'cov_min':
@@ -606,7 +608,8 @@ def tensor(self, X, y):
         elif self.metric_learning_method == 'ITML_mi':
             self.metric_tensor = self._train_metric_learning(X_mod,
                                                 y_mod,
-                                                self.metric_learning_method)
+                                                self.metric_learning_method,
+                                                random_state=5)
             mutuali= estimate_mutual_information(X, y)
             self.metric_tensor= np.matmul(self.metric_tensor, np.diag(mutuali))
         elif self.metric_learning_method == 'NCA':
@@ -616,9 +619,16 @@ def tensor(self, X, y):
             matrices = [self._train_metric_learning(X_mod,
                                                 y_mod,
                                                 self.metric_learning_method,
+                                                random_state=5,
                                                 prior='random') for i in range(2)]
 
             self.metric_tensor= psd_mean(matrices)
+        elif self.metric_learning_method == 'n_unique':
+            n_uniques = np.array([len(np.unique(X_mod[:, idx])) for idx in range(X.shape[1])])
+            self.metric_tensor = np.diag(np.sqrt(n_uniques))
+        elif self.metric_learning_method == 'n_unique_inv':
+            n_uniques = np.array([len(np.unique(X_mod[:, idx])) for idx in range(X.shape[1])])
+            self.metric_tensor = np.diag(np.sqrt(1.0/n_uniques))
 
         return self.metric_tensor
 
diff --git a/smote_variants/base/_simplexsampling.py b/smote_variants/base/_simplexsampling.py
index 6d04d3b..f33df7c 100644
--- a/smote_variants/base/_simplexsampling.py
+++ b/smote_variants/base/_simplexsampling.py
@@ -400,6 +400,8 @@ def determine_simplex_distribution(self, X, simplices):
             return np.repeat(1.0/len(simplices), len(simplices))
         if self.simplex_sampling == 'volume':
             return simplex_volumes(X[simplices])
+        if self.simplex_sampling == 'volume_inv':
+            return 1.0 / (simplex_volumes(X[simplices]) + 0.001)
         raise ValueError(f"simplex sampling with weighting "\
                             f"{self.simplex_sampling} not implemented yet")
 
@@ -541,8 +543,13 @@ def add_gaussian_noise(self, samples):
         """
 
         if 'sigma' in self.gaussian_component:
-            sigma = self.gaussian_component['sigma']
-            return samples + self.random_state.normal(size=samples.shape) * sigma
+            if 'fraction' not in self.gaussian_component:
+                sigma = self.gaussian_component['sigma']
+                return samples + self.random_state.normal(size=samples.shape) * sigma
+            else:
+                sigma = self.gaussian_component['sigma']
+                fraction = self.gaussian_component['fraction']
+                return samples + self.random_state.normal(size=samples.shape) * sigma * self.random_state.choice([0, 1], p=[1.0 - fraction, fraction], size=samples.shape)
         if 'sigmas' in self.gaussian_component:
             sigmas = self.gaussian_component['sigmas']
             return samples + self.random_state.normal(size=samples.shape) * sigmas
diff --git a/smote_variants/oversampling/_adasyn.py b/smote_variants/oversampling/_adasyn.py
index c3f4758..95da8ff 100755
--- a/smote_variants/oversampling/_adasyn.py
+++ b/smote_variants/oversampling/_adasyn.py
@@ -95,7 +95,7 @@ def parameter_combinations(cls, raw=False):
         Returns:
             list(dict): a list of meaningful parameter combinations
         """
-        parameter_combinations = {'n_neighbors': [3, 5, 7, 9],
+        parameter_combinations = {'n_neighbors': [3, 5, 7, 9, 11, 17],
                                   'd_th': [0.9],
                                   'proportion': [2.0, 1.5, 1.0, 0.75, 0.5, 0.25]}
         return cls.generate_parameter_combinations(parameter_combinations, raw)
diff --git a/smote_variants/oversampling/_borderline_smote.py b/smote_variants/oversampling/_borderline_smote.py
index 9ae4054..82ec9c1 100755
--- a/smote_variants/oversampling/_borderline_smote.py
+++ b/smote_variants/oversampling/_borderline_smote.py
@@ -145,8 +145,8 @@ def parameter_combinations(cls, raw=False):
         """
         parameter_combinations = {'proportion': [0.1, 0.25, 0.5, 0.75,
                                                  1.0, 1.5, 2.0],
-                                  'n_neighbors': [3, 5, 7],
-                                  'k_neighbors': [3, 5, 7]}
+                                  'n_neighbors': [3, 5, 7, 11, 17],
+                                  'k_neighbors': [3, 5, 7, 11, 17]}
 
         return cls.generate_parameter_combinations(parameter_combinations, raw)
 
diff --git a/smote_variants/oversampling/_lee.py b/smote_variants/oversampling/_lee.py
index e655e4c..6a48f49 100755
--- a/smote_variants/oversampling/_lee.py
+++ b/smote_variants/oversampling/_lee.py
@@ -113,7 +113,7 @@ def parameter_combinations(cls, raw=False):
         """
         parameter_combinations = {'proportion': [0.1, 0.25, 0.5, 0.75,
                                                  1.0, 1.5, 2.0],
-                                  'n_neighbors': [3, 5, 7],
+                                  'n_neighbors': [3, 5, 7, 11, 17],
                                   'rejection_level': [0.3, 0.5, 0.7]}
         return cls.generate_parameter_combinations(parameter_combinations, raw)
 
diff --git a/smote_variants/oversampling/_prowsyn.py b/smote_variants/oversampling/_prowsyn.py
index bb24c14..4d61914 100755
--- a/smote_variants/oversampling/_prowsyn.py
+++ b/smote_variants/oversampling/_prowsyn.py
@@ -109,7 +109,7 @@ def parameter_combinations(cls, raw=False):
         """
         parameter_combinations = {'proportion': [0.1, 0.25, 0.5, 0.75,
                                                  1.0, 1.5, 2.0],
-                                  'n_neighbors': [3, 5, 7],
+                                  'n_neighbors': [3, 5, 7, 11, 17],
                                   'L': [3, 5, 7],
                                   'theta': [0.1, 1.0, 2.0]}
         return cls.generate_parameter_combinations(parameter_combinations, raw)
diff --git a/smote_variants/oversampling/_smote.py b/smote_variants/oversampling/_smote.py
index aa237b0..b3e9843 100755
--- a/smote_variants/oversampling/_smote.py
+++ b/smote_variants/oversampling/_smote.py
@@ -87,7 +87,7 @@ def parameter_combinations(cls, raw=False):
         """
         parameter_combinations = {'proportion': [0.1, 0.25, 0.5, 0.75,
                                                  1.0, 1.5, 2.0],
-                                  'n_neighbors': [3, 5, 7]}
+                                  'n_neighbors': [3, 5, 7, 11, 17]}
 
         return cls.generate_parameter_combinations(parameter_combinations, raw)
 
diff --git a/smote_variants/oversampling/_smote_ipf.py b/smote_variants/oversampling/_smote_ipf.py
index bedd0c0..485f2e9 100755
--- a/smote_variants/oversampling/_smote_ipf.py
+++ b/smote_variants/oversampling/_smote_ipf.py
@@ -122,7 +122,7 @@ def parameter_combinations(cls, raw=False):
         classifiers = [('sklearn.tree', 'DecisionTreeClassifier', {'random_state': 2})]
         parameter_combinations = {'proportion': [0.1, 0.25, 0.5, 0.75,
                                                  1.0, 1.5, 2.0],
-                                  'n_neighbors': [3, 5, 7],
+                                  'n_neighbors': [3, 5, 7, 11, 17],
                                   'n_folds': [9],
                                   'k': [3],
                                   'p': [0.01],