update

Merge conversion toolkits update unittests by fixing the version update datasets add scripts Delete __init__.py add src update Update setup.py Update setup.py update all tests revise test cases Update unittests.yml Update initializer.py Create preprocessing.py Update __init__.py Update attention_cell.py Update prepare_wmt.py move ubuntu + windows to TODO
dmlc · Jun 10, 2020 · 7755555 · 7755555
1 parent ba3c131
commit 7755555
Show file tree

Hide file tree

Showing 99 changed files with 8,933 additions and 1,393 deletions.
diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
@@ -12,7 +12,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest]
+        # TODO Add ubuntu test by "ubuntu-latest", Add windows test by using "windows-latest"
+        os: [macos-latest]
         python-version: [ '3.6', '3.7', '3.8' ]
     steps:
       - name: Checkout repository
@@ -34,7 +35,7 @@ jobs:
           python -m pip install --user --upgrade pip
           python -m pip install --user setuptools pytest pytest-cov
           python -m pip install --upgrade cython
-          python -m pip install --pre --user mxnet -f https://dist.mxnet.io/python/cpu
+          python -m pip install --pre --user mxnet==2.0.0b20200604 -f https://dist.mxnet.io/python
           python -m pip install --user -e .[extras]
       - name: Test project
         run: |

diff --git a/.gitmodules b/.gitmodules
diff --git a/.pytype.cfg b/.pytype.cfg
@@ -5,4 +5,4 @@ inputs =
     src/gluonnlp
 
 # Python version (major.minor) of the target code.
-python_version = 3.5
+python_version = 3.6
diff --git a/README.md b/README.md
@@ -21,14 +21,10 @@ First of all, install the latest MXNet. You may use the following commands:
 ```bash
 
 # Install the version with CUDA 10.1
-pip install -U --pre mxnet-cu101 -f https://dist.mxnet.io/python
-
-# In case you do not have the permission, try the following
-pip install -U --pre mxnet-cu101 -f https://dist.mxnet.io/python --user
-
+pip install -U --pre mxnet-cu101==2.0.0b20200604 -f https://dist.mxnet.io/python
 
 # Install the cpu-only version
-pip install -U --pre mxnet -f https://dist.mxnet.io/python/cpu
+pip install -U --pre mxnet==2.0.0b20200604 -f https://dist.mxnet.io/python
 ```
 
 
@@ -67,10 +63,10 @@ nlp_data help
 nlp_preprocess help
 
 # Also, you can use `python -m` to access the toolkits
-python -m gluonnlp.cli.data help
-python -m gluonnlp.cli.preprocess help
+python -m numpy_nlp.cli.data help
+python -m numpy_nlp.cli.preprocess help
 
 ```
 
 # Run Unittests
-You may go to [tests](tests) to see all the unittests.
+You may go to [tests](tests) to see all how to run the unittests.
diff --git a/scripts/conversion_toolkits/README.md b/scripts/conversion_toolkits/README.md
@@ -36,9 +36,30 @@ do
     python convert_tf_hub_model.py --tf_hub_model_path albert_${model}_v2 --model_type albert --test
 done
 ```
+
 ## RoBERTa
 
-TBA
+```bash
+for model in base large
+do
+    mkdir roberta_${model}
+    wget "https://dl.fbaipublicfiles.com/fairseq/models/roberta.${model}.tar.gz"
+    tar zxf roberta.${model}.tar.gz --directory roberta_${model}
+    python convert_fairseq_roberta.py --fairseq_model_dir roberta_${model}/roberta.${model} --model_size ${model} --test
+done
+```
+
+## XLM-R
+
+```bash
+for model in base large
+do
+    mkdir xlmr_${model}
+    wget "https://dl.fbaipublicfiles.com/fairseq/models/xlmr.${model}.tar.gz"
+    tar zxf xlmr.${model}.tar.gz --directory xlmr_${model}
+    python convert_fairseq_xlmr.py --fairseq_model_dir xlmr_${model}/xlmr.${model} --model_size ${model} --test
+done
+```
 
 ## ELECTRA
 The TF Hub is not available for ELECTRA model currently.
@@ -57,6 +78,7 @@ pip install tensorflow==1.13.2
 bash convert_electra.sh
 ```
 
-## T5
-
-TBA
+## Mobile Bert
+```bash
+bash convert_mobilebert.sh
+```
diff --git a/scripts/conversion_toolkits/convert_bert_from_tf_hub.sh b/scripts/conversion_toolkits/convert_bert_from_tf_hub.sh
@@ -20,7 +20,7 @@ do
 done
 
 # Conversion for Chinese Models
-url="https://tfhub.dev/google/bert_zh_L-12_H-768_A-12/2?tf-hub-format=compressed"
+url="https://tfhub.dev/tensorflow/bert_zh_L-12_H-768_A-12/2?tf-hub-format=compressed"
 hub_directory="google_zh_bert_base"
 mkdir ${hub_directory}
 wget ${url} -O "${hub_directory}.tar.gz"
@@ -29,7 +29,7 @@ cp bert_base_config.json ${hub_directory}/assets/
 python convert_tf_hub_model.py --tf_hub_model_path ${hub_directory} --model_type bert --test
 
 # Conversion for Multi-lingual Models
-url="https://tfhub.dev/google/bert_multi_cased_L-12_H-768_A-12/2?tf-hub-format=compressed"
+url="https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/2?tf-hub-format=compressed"
 hub_directory="google_multi_cased_bert_base"
 mkdir ${hub_directory}
 wget ${url} -O "${hub_directory}.tar.gz"
@@ -42,9 +42,9 @@ for case in cased uncased
 do
     hub_directory="google_en_${case}_bert_wwm_large"
     mkdir ${hub_directory}
-    url="https://tfhub.dev/google/bert_en_wwm_${case}_L-24_H-1024_A-16/2?tf-hub-format=compressed"
-    wget ${url} -O ${hub_directory}
+    url="https://tfhub.dev/tensorflow/bert_en_wwm_${case}_L-24_H-1024_A-16/2?tf-hub-format=compressed"
+    wget ${url} -O "${hub_directory}.tar.gz"
     tar -xvf ${hub_directory}.tar.gz --directory ${hub_directory}
-    cp bert_${model}_config.json ${hub_directory}/assets/
+    cp bert_large_config.json ${hub_directory}/assets/
     python convert_tf_hub_model.py --tf_hub_model_path ${hub_directory} --model_type bert --test
 done