update

update Update benchmark_hf.py Update benchmark_hf.py Update benchmark_hf.py Create requirements.txt Update benchmark_hf.py Update benchmark_hf.py Update benchmark_hf.py Update benchmark_hf.py Update benchmark_hf.py Update benchmark_hf.py Update benchmark_hf.py Update benchmark_hf.py Update benchmark_hf.py Update benchmark_hf.py Update benchmark_hf.py Update benchmark_hf.py Update benchmark_hf.py Update requirements.txt update Update README.md Update benchmark_hf.py Update benchmark_hf.py Update benchmark_hf.py Update benchmark_hf.py Update benchmark_hf.py Update benchmark_hf.py fix fix Update test_models_bart.py Update test_models_bart.py Update bart.py update Update __init__.py Update electra.py update update Update convert_bert_from_tf_hub.sh update Update unittests.yml fix conversion update fix bert conversion update fix fix Update __init__.py fix bug fix css Update benchmark_utils.py Update benchmark_utils.py update update Update misc.py Update benchmark_utils.py Update benchmark_utils.py Update benchmark_utils.py Update benchmark_utils.py Update benchmark_utils.py Update benchmark_utils.py Update benchmark_utils.py Update benchmark_utils.py no multiprocessing Update benchmark_utils.py Update benchmark_utils.py Update benchmark_utils.py fix bug Update benchmark_utils.py Update benchmark_utils.py try to use mxnet profiler Update benchmark_utils.py Update benchmark_utils.py Update benchmark_utils.py Update benchmark_utils.py Update benchmark_utils.py Update benchmark_utils.py fix update Update benchmark_utils.py Update benchmark_utils.py Update benchmark_utils.py Update benchmark_utils.py fix Update benchmark_utils.py Update bart.py Update bart.py fix fix Update benchmark_utils.py Update benchmark_utils.py Update benchmark_utils.py Update benchmark_utils.py Update benchmark_gluonnlp.py Update benchmark_gluonnlp.py Update benchmark_gluonnlp.py Update benchmark_utils.py Update benchmark_utils.py Update benchmark_utils.py Update README.md
dmlc · Aug 10, 2020 · 2fd7e3b · 2fd7e3b
1 parent 9e268c0
commit 2fd7e3b
Show file tree

Hide file tree

Showing 32 changed files with 1,638 additions and 183 deletions.
diff --git a/.github/workflows/conversion_tool_test.yml b/.github/workflows/conversion_tool_test.yml
@@ -0,0 +1,42 @@
+name: conversion toolkits test
+
+on: [push]
+
+defaults:
+  run:
+    shell: bash
+
+jobs:
+  unittest:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python-version: ['3.7']
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v2
+
+      # Install OS specific dependencies
+      - name: Install Linux dependencies
+        if: matrix.os == 'ubuntu-latest'
+        # TODO https://github.com/apache/incubator-mxnet/issues/18293
+        run: sudo apt-get install libopenblas-dev
+
+      - name: Setup python
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+          architecture: x64
+      - name: Install Other Dependencies
+        run: |
+          python -m pip install --user --upgrade pip
+          python -m pip install --user setuptools pytest pytest-cov contextvars
+          python -m pip install --upgrade cython
+          python -m pip install --pre --user "mxnet>=2.0.0b20200802" -f https://dist.mxnet.io/python
+          python -m pip install --user -e .[extras]
+      - name: Test conversion tools
+        run: |
+          cd scripts/conversion_toolkits
+          bash convert_all.sh
diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
@@ -35,7 +35,7 @@ jobs:
           python -m pip install --user --upgrade pip
           python -m pip install --user setuptools pytest pytest-cov contextvars
           python -m pip install --upgrade cython
-          python -m pip install --pre --user "mxnet>=2.0.0b20200716" -f https://dist.mxnet.io/python
+          python -m pip install --pre --user "mxnet>=2.0.0b20200802" -f https://dist.mxnet.io/python
           python -m pip install --user -e .[extras]
       - name: Test project
         run: |

diff --git a/README.md b/README.md
@@ -20,35 +20,32 @@ First of all, install the latest MXNet. You may use the following commands:
 
 ```bash
 # Install the version with CUDA 10.0
-pip install -U --pre "mxnet-cu100>=2.0.0b20200802" -f https://dist.mxnet.io/python
+python3 -m pip install -U --pre "mxnet-cu100>=2.0.0b20200802" -f https://dist.mxnet.io/python
 
 # Install the version with CUDA 10.1
-pip install -U --pre "mxnet-cu101>=2.0.0b20200802" -f https://dist.mxnet.io/python
+python3 -m pip install -U --pre "mxnet-cu101>=2.0.0b20200802" -f https://dist.mxnet.io/python
 
 # Install the version with CUDA 10.2
-pip install -U --pre "mxnet-cu102>=2.0.0b20200802" -f https://dist.mxnet.io/python
+python3 -m pip install -U --pre "mxnet-cu102>=2.0.0b20200802" -f https://dist.mxnet.io/python
 
 # Install the cpu-only version
-pip install -U --pre "mxnet>=2.0.0b20200802" -f https://dist.mxnet.io/python
+python3 -m pip install -U --pre "mxnet>=2.0.0b20200802" -f https://dist.mxnet.io/python
 ```
 
 
-To install, use
+To install GluonNLP, use
 
 ```bash
-pip install -U -e .
+python3 -m pip install -U -e .
 
 # Also, you may install all the extra requirements via
-pip install -U -e .[extras]
-
-# In case you are using zsh, try to use the following command for installing
-pip install -U -e ."[extras]"
+python3 -m pip install -U -e ."[extras]"
 ```
 
 If you find that you do not have the permission, you can also install to the user folder:
 
 ```bash
-pip install -U -e . --user
+python3 -m pip install -U -e . --user
 ```
 
 For Windows users, we recommend to use the [Windows Subsystem for Linux](https://docs.microsoft.com/en-us/windows/wsl/about).
@@ -68,8 +65,8 @@ nlp_data help
 nlp_preprocess help
 
 # Also, you can use `python -m` to access the toolkits
-python -m gluonnlp.cli.data help
-python -m gluonnlp.cli.preprocess help
+python3 -m gluonnlp.cli.data help
+python3 -m gluonnlp.cli.preprocess help
 
 ```
 

diff --git a/docs/_static/custom.css b/docs/_static/custom.css
@@ -20,9 +20,11 @@
 }
 
 @media (max-width: 650px) {
-.install .option, .install .title {
-    width: 90%;
-}
-.install .title {
-    margin-top: 1em;
+    .install .option, .install .title {
+        width: 90%;
+    }
+
+    .install .title {
+        margin-top: 1em;
+    }
 }
diff --git a/scripts/benchmarks/README.md b/scripts/benchmarks/README.md
@@ -0,0 +1,21 @@
+# Benchmarking the Performance of NLP Backbones
+
+We benchmark the training, inference latency + memory usage of the NLP backbones.
+For comparison, we also provide the numbers of 
+
+## Backbones in HuggingFace
+
+We use the [huggingface benchmark](https://github.com/huggingface/transformers/tree/master/examples/benchmarking) 
+to benchmark the training + inference speed of common workloads in NLP. 
+
+```bash
+python3 -m pip install -U -r requirements.txt --user
+python3 benchmark_hf.py
+```
+
+## GluonNLP Backbones based on MXNet
+
+```bash
+python3 -m pip install -U -r requirements.txt --user
+python3 benchmark_gluonnlp.py
+```
diff --git a/scripts/benchmarks/benchmark_gluonnlp.py b/scripts/benchmarks/benchmark_gluonnlp.py
@@ -0,0 +1,68 @@
+from benchmark_utils import GluonNLPBackboneBenchmark
+
+
+MODELS = [
+    # 'google_en_uncased_bert_base',
+    # 'google_en_uncased_bert_large',
+    # 'google_albert_base_v2',
+    # 'google_albert_large_v2',
+    # 'google_albert_xlarge_v2',
+    # 'google_albert_xxlarge_v2',
+    'google_electra_small',
+    # 'google_electra_base',
+    # 'google_electra_large',
+    # 'google_uncased_mobilebert',
+    'fairseq_bart_base',
+    # 'fairseq_bart_large'
+]
+
+# (batch_size, seq_length)
+train_workloads =\
+    [(4, 128),
+     (8, 128),
+     (16, 128),
+     (32, 128),
+     (1, 512),
+     (2, 512),
+     (4, 512),
+     (8, 512)]
+
+
+inference_workloads = [
+    (1, 128),
+    (1, 384),
+    (1, 512),
+    (8, 32),
+    (8, 128),
+    (8, 512),
+    (32, 512),
+    (256, 128),
+    (400, 100),
+]
+
+if __name__ == '__main__':
+    for layout, compute_layout in [('NT', 'NT'),
+                                   ('NT', 'TN'),
+                                   ('TN', 'TN')]:
+
+        if compute_layout != layout:
+            profile_models = [ele for ele in MODELS if 'bart' not in ele]
+        else:
+            profile_models = [ele for ele in MODELS]
+        inference_benchmark = GluonNLPBackboneBenchmark(
+            workloads=inference_workloads,
+            model_names=profile_models,
+            profile_inference=True,
+            profile_train=False,
+            to_csv=True,
+            inference_out_csv_file='gluonnlp_infer_fp32_{}_{}.csv'.format(layout, compute_layout))
+        inference_benchmark.run()
+
+        train_benchmark = GluonNLPBackboneBenchmark(
+            workloads=train_workloads,
+            model_names=profile_models,
+            profile_inference=False,
+            profile_train=True,
+            to_csv=True,
+            train_out_csv_file='gluonnlp_train_fp32_{}_{}.csv'.format(layout, compute_layout))
+        train_benchmark.run()
diff --git a/scripts/benchmarks/benchmark_hf.py b/scripts/benchmarks/benchmark_hf.py
@@ -0,0 +1,146 @@
+import argparse
+import pandas as pd
+import math
+import os
+from multiprocessing import Process
+from transformers import HfArgumentParser, PyTorchBenchmark, PyTorchBenchmarkArguments
+
+
+HF_MODELS = [
+    'bert-base-uncased',
+    'bert-large-uncased',
+    'albert-base-v2',
+    'albert-large-v2',
+    'albert-xlarge-v2',
+    'albert-xxlarge-v2',
+    'google/electra-small-discriminator',
+    'google/electra-base-discriminator',
+    'google/electra-large-discriminator',
+    'google/mobilebert-uncased',
+    'facebook/bart-base',
+    'facebook/bart-large'
+]
+
+# (batch_size, seq_length)
+train_workloads =\
+    [(4, 128),
+     (8, 128),
+     (16, 128),
+     (32, 128),
+     (1, 512),
+     (2, 512),
+     (4, 512),
+     (8, 512)]
+
+
+inference_workloads = [
+    (1, 128),
+    (1, 384),
+    (1, 512),
+    (8, 32),
+    (8, 128),
+    (8, 512),
+    (32, 512),
+    (256, 128),
+    (400, 100),
+]
+
+
+if __name__ == '__main__':
+    # Profile PyTorch
+    parser = HfArgumentParser(PyTorchBenchmarkArguments)
+    # Benchmark Training
+    for use_fp16 in [False, True]:
+        df = pd.DataFrame(columns=['model', 'batch_size', 'sequence_length',
+                                   'latency', 'memory'])
+        for model in HF_MODELS:
+            for batch_size, seq_length in train_workloads:
+                prefix = '{}_{}_{}'.format(model, batch_size, seq_length).replace('/', '_')
+                args = ['--models', model,
+                        '--batch_sizes', '{}'.format(batch_size),
+                        '--sequence_lengths', '{}'.format(seq_length),
+                        '--train_time_csv_file', '{}.train_time.csv'.format(prefix),
+                        '--train_memory_csv_file', '{}.train_memory.csv'.format(prefix),
+                        '--no_env_print',
+                        '--repeat', '3',
+                        '--save_to_csv', '--training', '--no_inference']
+                if use_fp16:
+                    args.append('--fp16')
+                benchmark_args = parser.parse_args_into_dataclasses(args)[0]
+                benchmark = PyTorchBenchmark(args=benchmark_args)
+                p = Process(target=benchmark.run)
+                p.start()
+                p.join()
+                try:
+                    train_time_df = pd.read_csv('{}.train_time.csv'.format(prefix))
+                    train_memory_df = pd.read_csv('{}.train_memory.csv'.format(prefix))
+                    latency = train_time_df['result'][0]
+                    memory = train_memory_df['result'][0]
+                    os.remove('{}.train_time.csv'.format(prefix))
+                    os.remove('{}.train_memory.csv'.format(prefix))
+                except Exception:
+                    latency = math.nan
+                    memory = math.nan
+                new_df = pd.DataFrame({'model': [model],
+                                       'batch_size': [batch_size],
+                                       'sequence_length': [seq_length],
+                                       'latency': [latency],
+                                       'memory': [memory]})
+                df = df.append(new_df, ignore_index=True)
+                if use_fp16:
+                    df.to_csv('pytorch_train_fp16.csv')
+                else:
+                    df.to_csv('pytorch_train_fp32.csv')
+
+    # Benchmark Inference
+    for torch_script in [False, True]:
+        for use_fp16 in [False, True]:
+            if torch_script and use_fp16:
+                # Cannot support both torch_script and use_fp16.
+                continue
+            df = pd.DataFrame(columns=['model', 'batch_size', 'sequence_length',
+                                       'latency', 'memory'])
+            for model in HF_MODELS:
+                for batch_size, seq_length in inference_workloads:
+                    prefix = '{}_{}_{}'.format(model, batch_size, seq_length).replace('/', '_')
+                    args = ['--models', model,
+                            '--batch_sizes', '{}'.format(batch_size),
+                            '--sequence_lengths', '{}'.format(seq_length),
+                            '--inference_time_csv_file', '{}.inference_time.csv'.format(prefix),
+                            '--inference_memory_csv_file', '{}.inference_memory.csv'.format(prefix),
+                            '--no_env_print',
+                            '--repeat', '3',
+                            '--save_to_csv']
+                    if use_fp16:
+                        args.append('--fp16')
+                    if torch_script:
+                        args.append('--torchscript')
+                    benchmark_args = parser.parse_args_into_dataclasses(args)[0]
+                    benchmark = PyTorchBenchmark(args=benchmark_args)
+                    p = Process(target=benchmark.run)
+                    p.start()
+                    p.join()
+                    try:
+                        inference_time_df = pd.read_csv('{}.inference_time.csv'.format(prefix))
+                        inference_memory_df = pd.read_csv('{}.inference_memory.csv'.format(prefix))
+                        latency = inference_time_df['result'][0]
+                        memory = inference_memory_df['result'][0]
+                        os.remove('{}.inference_time.csv'.format(prefix))
+                        os.remove('{}.inference_memory.csv'.format(prefix))
+                    except Exception:
+                        latency = math.nan
+                        memory = math.nan
+                    new_df = pd.DataFrame({'model': [model],
+                                           'batch_size': [batch_size],
+                                           'sequence_length': [seq_length],
+                                           'latency': [latency],
+                                           'memory': [memory]})
+                    df = df.append(new_df, ignore_index=True)
+                    if use_fp16 and torch_script:
+                        df.to_csv('pytorch_infer_fp16_ts.csv')
+                    elif use_fp16 and not torch_script:
+                        df.to_csv('pytorch_infer_fp16.csv')
+                    elif not use_fp16 and torch_script:
+                        df.to_csv('pytorch_infer_fp32_ts.csv')
+                    else:
+                        df.to_csv('pytorch_infer_fp32.csv')