Merge pull request #141 from avolkov1/cusignal_example

Cusignal nodes for Greenflow.
NVIDIA · Sep 26, 2021 · 95d8f56 · 95d8f56
2 parents e5bfa2a + 0a596d3
commit 95d8f56
Show file tree

Hide file tree

Showing 42 changed files with 4,166 additions and 154 deletions.
diff --git a/gQuant/docker/build.sh b/gQuant/docker/build.sh
@@ -19,19 +19,14 @@ D_FILE=${D_FILE:='Dockerfile.dev'}
 echo "Building greenflow container..."
 
 echo -e "\nPlease, select your operating system:\n" \
-    "- '1' for Ubuntu 16.04\n" \
-    "- '2' for Ubuntu 18.04\n" \
-    "- '3' for Ubuntu 20.04\n"
+    "- '1' for Ubuntu 18.04\n" \
+    "- '2' for Ubuntu 20.04\n"
 
-read -p "Enter your option and hit return [1]-3: " OPERATING_SYSTEM
+read -p "Enter your option and hit return [1]-2: " OPERATING_SYSTEM
 
 OPERATING_SYSTEM=${OPERATING_SYSTEM:-1}
 case $OPERATING_SYSTEM in
     1)
-        echo "Ubuntu 16.04 selected."
-        OS_STR="ubuntu16.04"
-        ;;
-    2)
         echo "Ubuntu 18.04 selected."
         OS_STR="ubuntu18.04"
         ;;
@@ -42,35 +37,27 @@ case $OPERATING_SYSTEM in
 esac
 
 echo -e "\nPlease, select your CUDA version:\n" \
-    "- '1' for cuda 10.0\n" \
-    "- '2' for cuda 10.1\n" \
-    "- '3' for cuda 10.2\n" \
-    "- '4' for cuda 11.0 (minimum requirement for Ubuntu 20.04)\n"
+    "- '1' for cuda 11.0\n" \
+    "- '2' for cuda 11.2.2\n"
 
-read -p "Enter your option and hit return [1]-3: " CUDA_VERSION
-
-RAPIDS_VERSION="0.19.0"
+read -p "Enter your option and hit return [1]-2: " CUDA_VERSION
 
 CUDA_VERSION=${CUDA_VERSION:-1}
 case $CUDA_VERSION in
     2)
-        echo "CUDA 10.1 is selected"
-        CUDA_STR="10.1"
-        ;;
-    3)
-        echo "CUDA 10.2 is selected"
-        CUDA_STR="10.2"
+        echo "CUDA 11.2.2 is selected"
+        CUDA_STR="11.2.2"
         ;;
-    4)
+    *)
         echo "CUDA 11.0 is selected"
         CUDA_STR="11.0"
         ;;
-    *)
-        echo "CUDA 10.0 is selected"
-        CUDA_STR="10.0"
-        ;;
 esac
 
+RAPIDS_CUDA_VER=$(echo ${CUDA_STR} | sed -E 's/([0-9]+\.[0-9]{1,1})[^ ]*/\1/g')
+
+RAPIDS_VERSION="21.06"
+
 mkdir -p ${BUILDDIR}
 cp -r ${GREENFLOWDIR} ${BUILDDIR}
 rsync -av --progress ${GREENFLOWLABDIR} ${BUILDDIR} --exclude node_modules 
@@ -95,6 +82,22 @@ rsync -av --progress "${PLUGINSDIR}/dask_plugin" "${BUILDDIR}/plugins" \
   --exclude dask-worker-space \
   --exclude __pycache__
 
+rsync -av --progress "${PLUGINSDIR}/hrp_plugin" "${BUILDDIR}/plugins" \
+  --exclude data \
+  --exclude .cache \
+  --exclude many-small \
+  --exclude storage \
+  --exclude dask-worker-space \
+  --exclude __pycache__
+
+rsync -av --progress "${PLUGINSDIR}/cusignal_plugin" "${BUILDDIR}/plugins" \
+  --exclude data \
+  --exclude .cache \
+  --exclude many-small \
+  --exclude storage \
+  --exclude dask-worker-space \
+  --exclude __pycache__
+
 rsync -av --progress "${PLUGINSDIR}/simple_example" "${BUILDDIR}/plugins" \
   --exclude data \
   --exclude .cache \
@@ -136,19 +139,23 @@ RUN cd /home/quant/greenflow && pip install .
 
 ## install greenflowlab extension
 ADD --chown=$USERID:$USERGID ./build/greenflowlab /home/quant/greenflowlab
-RUN cd /home/quant/greenflowlab && pip install .
+RUN cd /home/quant/greenflowlab && pip install . && \
+    jlpm cache clean && jupyter lab clean
 
 RUN jupyter lab build
 
 ## install greenflow plugins
 ADD --chown=$USERID:$USERGID ./build/plugins /home/quant/plugins
 RUN cd /home/quant/plugins/gquant_plugin && pip install .
 RUN cd /home/quant/plugins/dask_plugin && pip install .
+RUN cd /home/quant/plugins/hrp_plugin && pip install .
+RUN cd /home/quant/plugins/cusignal_plugin && pip install .
 
 WORKDIR /home/quant/plugins/gquant_plugin
 ENTRYPOINT MODULEPATH=\$HOME/plugins/gquant_plugin/modules jupyter-lab \
   --allow-root --ip=0.0.0.0 --no-browser --NotebookApp.token='' \
-  --ContentsManager.allow_hidden=True
+  --ContentsManager.allow_hidden=True \
+  --ResourceUseDisplay.track_cpu_percent=True \
 
 EOM
     MODE_STR="prod"
@@ -157,22 +164,26 @@ esac
 
 greenflow_ver=$(grep version "${GREENFLOWDIR}/setup.py" | sed "s/^.*version='\([^;]*\)'.*/\1/")
 CONTAINER="nvidia/cuda:${CUDA_STR}-runtime-${OS_STR}"
-D_CONT=${D_CONT:="greenflow/greenflow:${greenflow_ver}-Cuda${CUDA_STR}_${OS_STR}_Rapids${RAPIDS_VERSION}_${MODE_STR}"}
+D_CONT=${D_CONT:="greenflow/greenflow:${greenflow_ver}-Cuda${RAPIDS_CUDA_VER}_${OS_STR}_Rapids${RAPIDS_VERSION}_${MODE_STR}"}
 
 
 pushd ${_basedir}
 
 cat > $D_FILE <<EOF
 FROM $CONTAINER
+
 EXPOSE 8888
 EXPOSE 8787
 EXPOSE 8786
+
+ENV DEBIAN_FRONTEND=noninteractive
+
 RUN apt-get update && \
     apt-get install -y --no-install-recommends software-properties-common && \
     add-apt-repository universe && apt-get update && \
     apt-get install -y --no-install-recommends \
-        curl git net-tools iproute2 vim wget locales-all build-essential \
-        libfontconfig1 libxrender1 rsync libsndfile1 ffmpeg && \
+        curl git less net-tools iproute2 vim wget locales-all build-essential \
+        apt-utils sshfs libfontconfig1 libxrender1 rsync libsndfile1 ffmpeg && \
     rm -rf /var/lib/apt/lists/*
 
 RUN mkdir /.local /.jupyter /.config /.cupy \
@@ -203,14 +214,15 @@ RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh &
     bash Miniconda3-latest-Linux-x86_64.sh -b && \
     rm -f Miniconda3-latest-Linux-x86_64.sh && \
     conda init && \
-    pip config set global.cache-dir false
+    pip config set global.cache-dir false && \
+    conda install -y mamba -n base -c conda-forge
 
-RUN conda install -y -c rapidsai -c nvidia -c conda-forge -c defaults \
-      rapids=$RAPIDS_VERSION cudatoolkit=$CUDA_STR python=3.8 && \
+RUN mamba install -y -c rapidsai -c nvidia -c conda-forge -c defaults \
+      rapids=$RAPIDS_VERSION cudatoolkit=$RAPIDS_CUDA_VER python=3.8 && \
     conda clean --all -y
 
-RUN conda install -y -c conda-forge -c defaults \
-      jupyterlab'>=3.0.0' jupyter-packaging'>=0.9.2' \
+RUN mamba install -y -c conda-forge -c defaults \
+      jupyterlab'>=3.0.0' jupyter-packaging'>=0.9.2' jupyterlab-system-monitor \
       nodejs=12.4.0 python-graphviz pydot ruamel.yaml && \
     conda clean --all -y && \
     jlpm cache clean && \
@@ -221,9 +233,9 @@ RUN pip install bqplot==0.12.21 && \
     jupyter lab clean
 
 ## install the nvdashboard
-# RUN pip install jupyterlab-nvdashboard
+# pip install git+https://github.com/rapidsai/jupyterlab-nvdashboard.git@branch-0.6
 RUN pip install --upgrade pip && \
-    pip install git+https://github.com/rapidsai/jupyterlab-nvdashboard.git@branch-0.6 && \
+    pip install jupyterlab-nvdashboard && \
     jlpm cache clean && \
     jupyter lab clean
 

diff --git a/gQuant/plugins/cusignal_plugin/README.md b/gQuant/plugins/cusignal_plugin/README.md
@@ -0,0 +1,37 @@
+## Greenflow Cusignal Plugin 
+
+Greenflow plugin that includes a set of nodes for Cusignal library. 
+
+
+### Install the greenflowlab JupyterLab plugin
+
+First create a Python enviroment or use one with RAPIDS cuSignal library. Tip,
+use mamba to resolve dependencies quicker.
+```bash
+conda create -n rapids_cusignal -c conda-forge mamba python=3.8
+
+conda activate rapids_cusignal
+
+mamba install -c rapidsai -c nvidia -c conda-forge \
+    cusignal=21.06 python=3.8 cudatoolkit=11.2
+```
+
+Then install `greenflowlab` JupyterLab plugin, make sure `nodejs` of version
+[12^14^15] is installed. E.g:
+```bash
+mamba install -c conda-forge python-graphviz nodejs=12.4.0 pydot
+```
+Then install the `greenflowlab`:
+```bash
+pip install greenflowlab
+```
+Or install `greenflowlab` at the greenflowlab directory:
+```bash
+pip install .
+```
+
+### Install the cusignal plugin
+Install the plugin directly from the plugin diretory.
+```bash
+pip install .
+```
diff --git a/gQuant/plugins/cusignal_plugin/greenflow_cusignal_plugin/__init__.py b/gQuant/plugins/cusignal_plugin/greenflow_cusignal_plugin/__init__.py
diff --git a/gQuant/plugins/cusignal_plugin/greenflow_cusignal_plugin/convolution/__init__.py b/gQuant/plugins/cusignal_plugin/greenflow_cusignal_plugin/convolution/__init__.py
@@ -0,0 +1,5 @@
+from .convolve import *
+from .correlate import *
+from .fftconvolve import *
+from .convolve2d import *
+from .correlate2d import *
diff --git a/gQuant/plugins/cusignal_plugin/greenflow_cusignal_plugin/convolution/convolve.py b/gQuant/plugins/cusignal_plugin/greenflow_cusignal_plugin/convolution/convolve.py
@@ -0,0 +1,129 @@
+import numpy as np
+import cupy as cp
+
+from cusignal.convolution import convolve as cuconv
+from scipy.signal import convolve as siconv
+
+from greenflow.dataframe_flow import (Node, PortsSpecSchema, ConfSchema)
+from greenflow.dataframe_flow.template_node_mixin import TemplateNodeMixin
+
+__all__ = ['CusignalConvolveNode']
+
+_CONV_DESC = '''Convolve two N-dimensional arrays.
+
+Convolve `in1` and `in2`, with the output size determined by the
+`mode` argument.
+
+Returns:
+convolve : array
+    An N-dimensional array containing a subset of the discrete linear
+    convolution of `in1` with `in2`.
+'''
+
+_CONV_MODE_DESC = '''mode : str {'full', 'valid', 'same'}, optional
+A string indicating the size of the output:
+
+    ``full``
+       The output is the full discrete linear convolution
+       of the inputs. (Default)
+    ``valid``
+       The output consists only of those elements that do not
+       rely on the zero-padding. In 'valid' mode, either `in1` or `in2`
+       must be at least as large as the other in every dimension.
+    ``same``
+       The output is the same size as `in1`, centered
+       with respect to the 'full' output.
+'''
+
+_CONV_METHOD_DESC = '''method : str {'auto', 'direct', 'fft'}, optional
+A string indicating which method to use to calculate the convolution.
+
+    ``direct``
+       The convolution is determined directly from sums, the definition of
+       convolution.
+    ``fft``
+       The Fourier Transform is used to perform the convolution by calling
+       `fftconvolve`.
+    ``auto``
+       Automatically chooses direct or Fourier method based on an estimate
+       of which is faster (default).
+'''
+
+
+class CusignalConvolveNode(TemplateNodeMixin, Node):
+    def init(self):
+        TemplateNodeMixin.init(self)
+
+        port_type = PortsSpecSchema.port_type
+        inports = {
+            'in1': {port_type: [cp.ndarray, np.ndarray]},
+            'in2': {port_type: [cp.ndarray, np.ndarray]}
+        }
+        outports = {
+            'convolve': {port_type: [cp.ndarray, np.ndarray]},
+        }
+        self.template_ports_setup(in_ports=inports, out_ports=outports)
+
+        meta_outports = {'convolve': {}}
+        self.template_meta_setup(out_ports=meta_outports)
+
+    def conf_schema(self):
+        mode_enum = ['full', 'valid', 'same']
+        method_enum = ['direct', 'fft', 'auto']
+        json = {
+            'title': 'Cusignal Convolution Node',
+            'type': 'object',
+            'description': _CONV_DESC,
+            'properties': {
+                'mode':  {
+                    'type': 'string',
+                    'description': _CONV_MODE_DESC,
+                    'enum': mode_enum,
+                    'default': 'full'
+                },
+                'method': {
+                    'type': 'string',
+                    'description': _CONV_METHOD_DESC,
+                    'enum': method_enum,
+                    'default': 'auto'
+                },
+                'normalize': {
+                    'type': 'boolean',
+                    'description': 'Scale convolutioni by in2 (typically a '
+                    'window) i.e. convolve(in1, in2) / sum(in2). '
+                    'Default False.',
+                    'default': False
+                },
+                'use_cpu': {
+                    'type': 'boolean',
+                    'description': 'Use CPU for computation via '
+                    'scipy::signal.convolve. Default is False and runs on '
+                    'GPU via cusignal.',
+                    'default': False
+                },
+            },
+        }
+        return ConfSchema(json=json)
+
+    def process(self, inputs):
+        mode = self.conf.get('mode', 'full')
+        method = self.conf.get('method', 'auto')
+        normalize = self.conf.get('normalize', False)
+        use_cpu = self.conf.get('use_cpu', False)
+
+        in1 = inputs['in1']
+        in2 = inputs['in2']
+
+        if use_cpu:
+            conv = siconv(in1, in2, mode=mode, method=method)
+            if normalize:
+                scale = np.sum(in2)
+        else:
+            conv = cuconv(in1, in2, mode=mode, method=method)
+            if normalize:
+                scale = cp.sum(in2)
+
+        if normalize:
+            conv = conv if scale == 1 else conv / scale
+
+        return {'convolve': conv}