Skip to content

Commit

Permalink
Added utility functions on docs
Browse files Browse the repository at this point in the history
  • Loading branch information
prithagupta committed Aug 16, 2024
1 parent 26e87c7 commit 4e16003
Show file tree
Hide file tree
Showing 5 changed files with 158 additions and 23 deletions.
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Contents

notebooks/comparing_mi_estimators
notebooks/automated_information_leakage_detection
notebooks/utils

.. toctree::
:maxdepth: 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,13 @@
},
{
"cell_type": "code",
"execution_count": 1,
"id": "63a2d792-bd63-4f48-b64c-5bdcf0d9af40",
"metadata": {},
"outputs": [],
"metadata": {
"ExecuteTime": {
"end_time": "2024-08-16T19:46:29.050721Z",
"start_time": "2024-08-16T19:46:29.033296Z"
}
},
"source": [
"import logging\n",
"import warnings\n",
Expand All @@ -31,7 +34,9 @@
"logging.getLogger(\"pytorch\").setLevel(logging.ERROR)\n",
"logging.getLogger(\"torch\").setLevel(logging.ERROR)\n",
"logging.getLogger(\"urllib3.connectionpool\").setLevel(logging.ERROR)"
]
],
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
Expand All @@ -52,13 +57,9 @@
}
],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.model_selection import train_test_split\n",
"from autoqild.detectors import TabPFNLeakageDetector, RandomForestLeakageDetector\n",
"from autoqild.dataset_readers import SyntheticDatasetGeneratorDistance\n",
"from utils import setup_logging, setup_random_seed, create_search_space\n",
"import pandoc"
"from utils import setup_logging, setup_random_seed, create_search_space"
]
},
{
Expand Down
7 changes: 2 additions & 5 deletions docs/source/notebooks/comparing_mi_estimators.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,12 @@
"outputs": [],
"source": [
"# Cell 1: Setup and Imports\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"from autoqild.dataset_readers import SyntheticDatasetGenerator\n",
"from autoqild.mi_estimators.mi_estimator_classification import ClassficationMIEstimator\n",
"from autoqild.mi_estimators import MineMIEstimatorMSE, GMMMIEstimator, TabPFNMIEstimator\n",
"from autoqild.utilities import print_dictionary\n",
"from utils import setup_logging, setup_random_seed\n",
"from sklearn.model_selection import train_test_split\n",
"import pandoc"
"from sklearn.model_selection import train_test_split"
]
},
{
Expand All @@ -57,7 +54,7 @@
"source": [
"**Setting Up Experiment Logging and Random Seed:**\n",
"\n",
"Initialize logging for the experiment, recording all key events in info_leakage_detection.log for tracking and debugging."
"Initialize logging for the experiment, recording all key events in info_leakage_detection.log for tracking and debugging. Using the utils.py file."
]
},
{
Expand Down
9 changes: 0 additions & 9 deletions docs/source/notebooks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
from sklearn.utils import check_random_state
from skopt.space import Real, Categorical, Integer

from autoqild import *


def create_search_space(hp_ranges, logger):
def isint(v):
Expand Down Expand Up @@ -45,13 +43,6 @@ def isstr(v):
return search_space


def convert_learner_params(params):
for key, value in params.items():
if value == "None":
params[key] = None
return params


def setup_logging(log_path=None, level=logging.INFO):
"""Function setup as many logging for the experiments."""
if log_path is None:
Expand Down
145 changes: 145 additions & 0 deletions docs/source/notebooks/utils.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
Utilities Module
================

This module contains utility functions for logging setup, search space creation, and random seed setup, designed for compatibility with TensorFlow, PyTorch, and scikit-learn.

Functions
---------

create_search_space
-------------------

This function creates a hyperparameter search space based on provided ranges, supporting integers, floats, booleans, and strings.

**Code:**

.. code-block:: python
import inspect
import logging
import os
import random
import numpy as np
import sklearn
import tensorflow as tf
import torch
from packaging import version
from skopt.space import Real, Categorical, Integer
def create_search_space(hp_ranges, logger):
def isint(v):
return type(v) is int
def isfloat(v):
return type(v) is float
def isbool(v):
return type(v) is bool
def isstr(v):
return type(v) is str
search_space = {}
for key, value in hp_ranges.items():
logger.info(f"Before key {key} value {value}")
if version.parse(sklearn.__version__) < version.parse("0.25.0"):
if key == "criterion" and "squared_error" in value:
value = ["friedman_mse", "mse"]
if isint(value[0]) and isint(value[1]):
search_space[key] = Integer(value[0], value[1])
if isfloat(value[0]) and isfloat(value[1]):
if len(value) == 3:
search_space[key] = Real(value[0], value[1], prior=value[2])
if (isbool(value[0]) and isbool(value[1])) or (isstr(value[0]) and isstr(value[1])):
search_space[key] = Categorical(value)
logger.info(f"key {key} value {value}")
return search_space
setup_logging
-------------

Sets up logging for experiments, allowing control over log file location and verbosity.

**Code:**

.. code-block:: python
def setup_logging(log_path=None, level=logging.INFO):
"""Function setup as many logging for the experiments."""
if log_path is None:
dirname = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
dirname = os.path.dirname(dirname)
log_path = os.path.join(dirname, "logs", "logs.log")
logging.basicConfig(
filename=log_path,
level=level,
format="%(asctime)s %(name)s %(levelname)-8s %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
force=True,
)
logger = logging.getLogger("SetupLogging") # root logger
logger.info("log file path: {}".format(log_path))
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # Suppresses INFO, WARNING, and ERROR logs
# Additional TensorFlow setting to disable GPU usage explicitly
tf.config.set_visible_devices([], "GPU")
logging.captureWarnings(False)
import warnings
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning)
logging.getLogger("matplotlib").setLevel(logging.ERROR)
logging.getLogger("tensorflow").setLevel(logging.ERROR)
logging.getLogger("pytorch").setLevel(logging.ERROR)
logging.getLogger("torch").setLevel(logging.ERROR)
logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR)
setup_random_seed
-----------------

Sets up a random seed across TensorFlow, PyTorch, NumPy, and Python’s `random` module, while also configuring CPU and GPU usage.

**Code:**

.. code-block:: python
def setup_random_seed(random_state=1234):
logger = logging.getLogger("Setup Logging")
random_state = check_random_state(random_state)
seed = random_state.randint(2**31, dtype="uint32")
torch.manual_seed(seed)
logger.info(f"Total number of torch threads {torch.get_num_threads()}")
if torch.get_num_threads() <= 2:
n_cpus = 1
else:
n_cpus = torch.get_num_threads() - 2
if "pc2" in os.environ["HOME"]:
n_cpus = 4
logger.info(f"Torch threads set {n_cpus}")
torch.set_num_threads(n_cpus)
tf.random.set_seed(seed)
seed = random_state.randint(2**31, dtype="uint32")
np.random.seed(seed)
random.seed(seed)
os.environ["KERAS_BACKEND"] = "tensorflow"
devices = tf.config.list_physical_devices("GPU")
logger.info("Keras Devices {}".format(devices))
n_gpus = len(devices)
logger.info("Keras GPU {}".format(n_gpus))
if n_gpus == 0:
cpu_count = multiprocessing.cpu_count()
tf.config.threading.set_inter_op_parallelism_threads(1)
tf.config.threading.set_intra_op_parallelism_threads(1)
if cpu_count > 2:
pass
else:
for gpu in tf.config.list_physical_devices("GPU"):
tf.config.experimental.set_memory_growth(gpu, True)
torch_gpu = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logger.info("Torch GPU device {}".format(torch_gpu))

0 comments on commit 4e16003

Please sign in to comment.