securefederatedai · psfoley · May 23, 2023 · Apr 27, 2023 · Apr 28, 2023 · Apr 28, 2023
diff --git a/openfl-tutorials/interactive_api/PyTorch_FedProx_MNIST/README.md b/openfl-tutorials/interactive_api/PyTorch_FedProx_MNIST/README.md
@@ -0,0 +1,71 @@
+# MedMNIST 2D Classification Using FedProx Optimizer Tutorial
+
+![MedMNISTv2_overview](https://raw.githubusercontent.com/MedMNIST/MedMNIST/main/assets/medmnistv2.jpg)
+
+For more details, please refer to the original paper:
+**MedMNIST v2: A Large-Scale Lightweight Benchmark for 2D and 3D Biomedical Image Classification** ([arXiv](https://arxiv.org/abs/2110.14795)), and [PyPI](https://pypi.org/project/medmnist/).
+
+This example differs from PyTorch_MedMNIST_2D in that it uses the FedProx Optimizer. For more information on FedProx see:
+**Federated Optimization in Heterogeneous Networks** ([arXiv](https://arxiv.org/abs/1812.06127)).
+
+## I. About model and experiments
+
+We use a simple convolutional neural network and settings coming from [the experiments](https://github.com/MedMNIST/experiments) repository.
+<br/>
+
+## II. How to run this tutorial (without TLC and locally as a simulation):
+### 0. If you haven't done so already, create a virtual environment, install OpenFL, and upgrade pip:
+  - For help with this step, visit the "Install the Package" section of the [OpenFL installation instructions](https://openfl.readthedocs.io/en/latest/install.html#install-the-package).
+<br/>
+
+### 1. Split terminal into 3 (1 terminal for the director, 1 for the envoy, and 1 for the experiment)
+<br/>
+
+### 2. Do the following in each terminal:
+   - Activate the virtual environment from step 0:
+
+   ```sh
+   source venv/bin/activate
+   ```
+   - If you are in a network environment with a proxy, ensure proxy environment variables are set in each of your terminals.
+   - Navigate to the tutorial:
+
+   ```sh
+   cd openfl/openfl-tutorials/interactive_api/PyTorch_FedProx_MedMNIST
+   ```
+<br/>
+
+### 3. In the first terminal, run the director:
+
+```sh
+cd director
+./start_director.sh
+```
+<br/>
+
+### 4. In the second terminal, install requirements and run the envoy:
+
+```sh
+cd envoy
+pip install -r requirements.txt
+./start_envoy.sh env_one envoy_config.yaml
+```
+
+Optional: Run a second envoy in an additional terminal:
+  - Ensure step 2 is complete for this terminal as well.
+  - Run the second envoy:
+```sh
+cd envoy
+./start_envoy.sh env_two envoy_config.yaml
+```
+<br/>
+
+### 5. In the third terminal (or forth terminal, if you chose to do two envoys) run the Jupyter Notebook:
+
+```sh
+cd workspace
+jupyter lab Pytorch_FedProx_MedMNIST_2D.ipynb
+```
+- A Jupyter Server URL will appear in your terminal. In your browser, proceed to that link. Once the webpage loads, click on the Pytorch_FedProx_MedMNIST_2D.ipynb file.
+- To run the experiment, select the icon that looks like two triangles to "Restart Kernel and Run All Cells".
+- You will notice activity in your terminals as the experiments runs, and when the experiment is finished the director terminal will display a message that the experiment was finished successfully.
diff --git a/openfl-tutorials/interactive_api/PyTorch_FedProx_MNIST/director/director_config.yaml b/openfl-tutorials/interactive_api/PyTorch_FedProx_MNIST/director/director_config.yaml
@@ -0,0 +1,6 @@
+settings:
+  listen_host: localhost
+  listen_port: 50051
+  sample_shape: ['28', '28', '3']
+  target_shape: ['1','1']
+
diff --git a/openfl-tutorials/interactive_api/PyTorch_FedProx_MNIST/director/start_director.sh b/openfl-tutorials/interactive_api/PyTorch_FedProx_MNIST/director/start_director.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+set -e
+
+fx director start --disable-tls -c director_config.yaml
diff --git a/openfl-tutorials/interactive_api/PyTorch_FedProx_MNIST/envoy/envoy_config.yaml b/openfl-tutorials/interactive_api/PyTorch_FedProx_MNIST/envoy/envoy_config.yaml
@@ -0,0 +1,11 @@
+params:
+  cuda_devices: []
+
+optional_plugin_components: {}
+
+shard_descriptor:
+  template: medmnist_shard_descriptor.MedMNISTShardDescriptor
+  params:
+    rank_worldsize: 1, 1
+    datapath: data/.
+    dataname: bloodmnist
diff --git a/openfl-tutorials/interactive_api/PyTorch_FedProx_MNIST/envoy/medmnist_shard_descriptor.py b/openfl-tutorials/interactive_api/PyTorch_FedProx_MNIST/envoy/medmnist_shard_descriptor.py
@@ -0,0 +1,129 @@
+# Copyright (C) 2020-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+"""MedMNIST Shard Descriptor."""
+
+import logging
+import os
+from typing import Any, List, Tuple
+from medmnist.info import INFO, HOMEPAGE
+
+import numpy as np
+
+from openfl.interface.interactive_api.shard_descriptor import ShardDataset
+from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor
+
+logger = logging.getLogger(__name__)
+
+
+class MedMNISTShardDataset(ShardDataset):
+    """MedMNIST Shard dataset class."""
+
+    def __init__(self, x, y, data_type: str = 'train', rank: int = 1, worldsize: int = 1) -> None:
+        """Initialize MedMNISTDataset."""
+        self.data_type = data_type
+        self.rank = rank
+        self.worldsize = worldsize
+        self.x = x[self.rank - 1::self.worldsize]
+        self.y = y[self.rank - 1::self.worldsize]
+
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        """Return an item by the index."""
+        return self.x[index], self.y[index]
+
+    def __len__(self) -> int:
+        """Return the len of the dataset."""
+        return len(self.x)
+
+
+class MedMNISTShardDescriptor(ShardDescriptor):
+    """MedMNIST Shard descriptor class."""
+
+    def __init__(
+            self,
+            rank_worldsize: str = '1, 1',
+            datapath: str = '',
+            dataname: str = 'bloodmnist',
+            **kwargs
+    ) -> None:
+        """Initialize MedMNISTShardDescriptor."""
+        self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(','))
+
+        self.datapath = datapath
+        self.dataset_name = dataname
+        self.info = INFO[self.dataset_name]
+
+        (x_train, y_train), (x_test, y_test) = self.load_data()
+        self.data_by_type = {
+            'train': (x_train, y_train),
+            'val': (x_test, y_test)
+        }
+
+    def get_shard_dataset_types(self) -> List[str]:
+        """Get available shard dataset types."""
+        return list(self.data_by_type)
+
+    def get_dataset(self, dataset_type='train') -> MedMNISTShardDataset:
+        """Return a shard dataset by type."""
+        if dataset_type not in self.data_by_type:
+            raise Exception(f'Wrong dataset type: {dataset_type}')
+        return MedMNISTShardDataset(
+            *self.data_by_type[dataset_type],
+            data_type=dataset_type,
+            rank=self.rank,
+            worldsize=self.worldsize
+        )
+
+    @property
+    def sample_shape(self) -> List[str]:
+        """Return the sample shape info."""
+        return ['28', '28', '3']
+
+    @property
+    def target_shape(self) -> List[str]:
+        """Return the target shape info."""
+        return ['1', '1']
+
+    @property
+    def dataset_description(self) -> str:
+        """Return the dataset description."""
+        return (f'MedMNIST dataset, shard number {self.rank}'
+                f' out of {self.worldsize}')
+
+    @staticmethod
+    def download_data(datapath: str = 'data/',
+                      dataname: str = 'bloodmnist',
+                      info: dict = {}) -> None:
+
+        logger.info(f"{datapath}\n{dataname}\n{info}")
+        try:
+            from torchvision.datasets.utils import download_url
+            download_url(url=info["url"],
+                         root=datapath,
+                         filename=dataname,
+                         md5=info["MD5"])
+        except Exception:
+            raise RuntimeError('Something went wrong when downloading! '
+                               + 'Go to the homepage to download manually. '
+                               + HOMEPAGE)
+
+    def load_data(self) -> Tuple[Tuple[Any, Any], Tuple[Any, Any]]:
+        """Download prepared dataset."""
+
+        dataname = self.dataset_name + '.npz'
+        dataset = os.path.join(self.datapath, dataname)
+
+        if not os.path.isfile(dataset):
+            logger.info(f"Dataset {dataname} not found at:{self.datapath}.\n\tDownloading...")
+            MedMNISTShardDescriptor.download_data(self.datapath, dataname, self.info)
+            logger.info("DONE!")
+
+        data = np.load(dataset)
+
+        x_train = data["train_images"]
+        x_test = data["test_images"]
+
+        y_train = data["train_labels"]
+        y_test = data["test_labels"]
+        logger.info('MedMNIST data was loaded!')
+        return (x_train, y_train), (x_test, y_test)
diff --git a/openfl-tutorials/interactive_api/PyTorch_FedProx_MNIST/envoy/requirements.txt b/openfl-tutorials/interactive_api/PyTorch_FedProx_MNIST/envoy/requirements.txt
@@ -0,0 +1,3 @@
+medmnist
+setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability
+wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability
diff --git a/openfl-tutorials/interactive_api/PyTorch_FedProx_MNIST/envoy/start_envoy.sh b/openfl-tutorials/interactive_api/PyTorch_FedProx_MNIST/envoy/start_envoy.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -e
+ENVOY_NAME=$1
+ENVOY_CONF=$2
+
+fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50051