Skip to content
This repository has been archived by the owner on Jan 15, 2024. It is now read-only.

Commit

Permalink
Add AMP + Update Benchmarking Script (#1405)
Browse files Browse the repository at this point in the history
* Update transformer_xl.py

Update testing.py

Update testing.py

Update testing.py

Update testing.py

Update testing.py

Update testing.py

Update testing.py

Update testing.py

Update testing.py

Update testing.py

Update testing.py

Update testing.py

Update testing.py

Update testing.py

Update testing.py

Update attention_cell.py

Update testing.py

Update testing.py

Update testing.py

Update test_models_bert.py

Update run_batch_squad.sh

Update generate_commands.py

Update run_batch_squad.sh

Update run_batch_squad.sh

Update run_batch_squad.sh

Add region

Update generate_commands.py

Update run_squad.template

Try to use clip 1.0

update

Update README.md

Update attention_cell.py

Update benchmark_gluonnlp.py

Update attention_cell.py

Update testing.py

Update run_squad.py

Update attention_cell.py

Update attention_cell.py

Update attention_cell.py

update

Update attention_cell.py

update

Update numbers + log + weight

update

update

Update testing.py

* Update run_squad.py

* Update test_models_mobilebert.py

* Update README.md

* Update test_models_bert.py

* Update testing.py

* Update test_models_mobilebert.py

* Update test_models_roberta.py

* Update gpt2.py

* Update testing.py

* Update bart.py

* Update testing.py

* Update testing.py

* Update README.md

* Update README.md

* Update testing.py

* fix

* update

* Update test_models_roberta.py

* Update test_models_bart.py

* Update test_models_bart.py

* Update test_models_bart.py

* Update testing.py

* only include bart-base

* Update bart.py

* Update bart.py

* update

* Update test_models_transformer.py

* Update test_models_transformer.py

* Update test_models_transformer.py

* Update test_models_transformer.py

* Update run_squad.py

* Update attention_cell.py

* Update README.md

* Update test_models.py

* Update run_squad.py

* Update run_squad.py

* update

* Update run_squad.template

* update

* Update generate_commands.py

* Update optimizer.py

* update

* Update run_squad.py

* update

* Update run_batch_squad.sh

* update

* Update testing.py

* Update test_optimizer.py

* Update benchmark_utils.py

* update

* fix bug in inference

* Update benchmark_gluonnlp.py

* Update run_batch_squad.sh

* Update benchmark_utils.py

* Update run_squad.py

* Update run_squad.py

* Update run_squad.py

* Update run_squad.py

* update
  • Loading branch information
sxjscience authored Nov 6, 2020
1 parent 1726dd2 commit dd45270
Show file tree
Hide file tree
Showing 45 changed files with 723 additions and 299 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ process the text data, and train models.

# Features

- Easy-to-use Text Processing Tools and APIs
- Easy-to-use Text Processing Tools and Modular APIs
- Pretrained Model Zoo
- Write Models with Numpy-like API
- Fast Inference via [Apache TVM (incubating)](https://tvm.apache.org/) (Experimental)
Expand All @@ -28,16 +28,16 @@ First of all, install the latest MXNet. You may use the following commands:

```bash
# Install the version with CUDA 10.0
python3 -m pip install -U --pre "mxnet-cu100>=2.0.0b20200926" -f https://dist.mxnet.io/python
python3 -m pip install -U --pre "mxnet-cu100>=2.0.0b20201101" -f https://dist.mxnet.io/python

# Install the version with CUDA 10.1
python3 -m pip install -U --pre "mxnet-cu101>=2.0.0b20200926" -f https://dist.mxnet.io/python
python3 -m pip install -U --pre "mxnet-cu101>=2.0.0b20201101" -f https://dist.mxnet.io/python

# Install the version with CUDA 10.2
python3 -m pip install -U --pre "mxnet-cu102>=2.0.0b20200926" -f https://dist.mxnet.io/python
python3 -m pip install -U --pre "mxnet-cu102>=2.0.0b20201101" -f https://dist.mxnet.io/python

# Install the cpu-only version
python3 -m pip install -U --pre "mxnet>=2.0.0b20200926" -f https://dist.mxnet.io/python
python3 -m pip install -U --pre "mxnet>=2.0.0b20201101" -f https://dist.mxnet.io/python
```


Expand Down
21 changes: 14 additions & 7 deletions scripts/benchmarks/benchmark_gluonnlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,20 +58,22 @@ def get_parser():
help='Whether to use TVM for inference/training')
parser.add_argument('--instance_type', choices=['c4', 'c5', 'g4', 'p3'], default='g4',
help='The instance type that the profiling script will be run on.')
parser.add_argument('--use_fp16', action='store_true')
parser.add_argument('--mode', type=str, default='train',
choices=['train', 'inference'])
return parser


def run_benchmark(workload, model_name, out_file_name, is_train,
use_tvm, instance_type):
use_tvm, instance_type, use_fp16):
if is_train:
benchmark = GluonNLPBackboneBenchmark(
workloads=workload,
model_names=model_name,
profile_inference=False,
profile_train=True,
to_csv=True,
use_fp16=use_fp16,
train_out_csv_file=out_file_name)
benchmark.run()
else:
Expand All @@ -83,6 +85,7 @@ def run_benchmark(workload, model_name, out_file_name, is_train,
use_tvm=use_tvm,
instance_type=instance_type,
to_csv=True,
use_fp16=use_fp16,
inference_out_csv_file=out_file_name)
benchmark.run()
return
Expand All @@ -94,13 +97,15 @@ def run_benchmark(workload, model_name, out_file_name, is_train,
args = parser.parse_args()
if args.compute_layout is None:
args.compute_layout = args.layout
dtype = 'float32' if not args.use_fp16 else 'float16'
for layout, compute_layout in [(args.layout, args.compute_layout)]:
if compute_layout != layout:
profile_models = [ele for ele in MODELS if 'bart' not in ele]
else:
profile_models = [ele for ele in MODELS]
if args.mode == 'inference':
out_dir = 'infer_fp32_{}_{}_tvm{}'.format(layout, compute_layout, int(args.use_tvm))
out_dir = 'infer_{}_{}_{}_tvm{}'.format(dtype, layout, compute_layout,
int(args.use_tvm))
df = pd.DataFrame(columns=['model', 'batch_size', 'sequence_length',
'latency', 'memory'])
os.makedirs(out_dir, exist_ok=True)
Expand All @@ -111,16 +116,17 @@ def run_benchmark(workload, model_name, out_file_name, is_train,
process = Process(
target=run_benchmark,
args=(workload, model_name, out_path, False,
args.use_tvm, args.instance_type))
args.use_tvm, args.instance_type, args.use_fp16))
process.start()
process.join()
new_df = pd.read_csv(out_path)
df = df.append(new_df, ignore_index=True)
df.to_csv('gluonnlp_infer_fp32_{}_{}_tvm{}.csv'.format(layout,
df.to_csv('gluonnlp_infer_{}_{}_{}_tvm{}.csv'.format(dtype,
layout,
compute_layout,
int(args.use_tvm)))
elif args.mode == 'train':
out_dir = 'train_fp32_{}_{}'.format(layout, compute_layout)
out_dir = 'train_{}_{}_{}'.format(dtype, layout, compute_layout)
df = pd.DataFrame(columns=['model', 'batch_size', 'sequence_length',
'latency', 'memory'])
os.makedirs(out_dir, exist_ok=True)
Expand All @@ -130,11 +136,12 @@ def run_benchmark(workload, model_name, out_file_name, is_train,
workload[1]))
process = Process(
target=run_benchmark,
args=(workload, model_name, out_path, True))
args=(workload, model_name, out_path, True, False,
args.instance_type, args.use_fp16))
process.start()
process.join()
new_df = pd.read_csv(out_path)
df = df.append(new_df, ignore_index=True)
df.to_csv('gluonnlp_train_fp32_{}_{}.csv'.format(layout, compute_layout))
df.to_csv('gluonnlp_train_{}_{}_{}.csv'.format(dtype, layout, compute_layout))
else:
raise NotImplementedError
14 changes: 14 additions & 0 deletions scripts/benchmarks/benchmark_gluonnlp_fp16.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
for mode in train inference
do
python3 benchmark_gluonnlp.py --layout NT --compute_layout NT --mode $mode --use_fp16
done

for mode in train inference
do
python3 benchmark_gluonnlp.py --layout NT --compute_layout TN --mode $mode --use_fp16
done

for mode in train inference
do
python3 benchmark_gluonnlp.py --layout TN --compute_layout TN --mode $mode --use_fp16
done
18 changes: 12 additions & 6 deletions scripts/benchmarks/benchmark_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,6 @@ def __init__(self, workloads, model_names, use_fp16=False,
self._inference_out_csv_file = inference_out_csv_file
self._train_out_csv_file = train_out_csv_file
self._env_info_file = env_info_file
assert use_fp16 is False, 'Currently fp16 benchmark has not been supported yet.'

@property
def model_names(self):
Expand All @@ -760,22 +759,26 @@ def workloads(self):

def _inference_speed_memory(self, model_name: str, batch_size: int, sequence_length: int)\
-> Tuple[float, Memory]:
if self._use_fp16:
dtype = 'float16'
else:
dtype = 'float32'
if self._use_gpu:
ctx = mxnet.gpu()
else:
ctx = mxnet.cpu()
model_cls, cfg, tokenizer, backbone_param_path, _ = get_backbone(model_name)
# TODO Support fp16 profiling
cfg.defrost()
cfg.MODEL.layout = self._layout
if model_cls.__name__ not in ['BartModel']:
cfg.MODEL.compute_layout = self._compute_layout
cfg.freeze()
if model_cls.__name__ in ['BartModel']:
model = model_cls.from_cfg(cfg, extract_feature=True)
model = model_cls.from_cfg(cfg, extract_feature=True, dtype=dtype)
else:
model = model_cls.from_cfg(cfg)
model.load_parameters(backbone_param_path, ctx=ctx)
model = model_cls.from_cfg(cfg, dtype=dtype)
model.load_parameters(backbone_param_path, ctx=ctx, cast_dtype=True)
model.cast(dtype)
model.hybridize()
vocab_size = cfg.MODEL.vocab_size
if self._layout == 'NT':
Expand Down Expand Up @@ -860,12 +863,15 @@ def run_tvm_forward():

def _train_speed_memory(self, model_name: str, batch_size: int, sequence_length: int)\
-> Tuple[float, Memory]:
if self._use_fp16:
from mxnet import amp
amp.init()

if self._use_gpu:
ctx = mxnet.gpu()
else:
ctx = mxnet.cpu()
model_cls, cfg, tokenizer, backbone_param_path, _ = get_backbone(model_name)
# TODO Support fp16 profiling
cfg.defrost()
cfg.MODEL.layout = self._layout
if model_cls.__name__ not in ['BartModel']:
Expand Down
1 change: 1 addition & 0 deletions scripts/machine_translation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ python3 train_transformer.py \
--save_dir transformer_base_wmt2014_en_de_${SUBWORD_ALGO} \
--cfg transformer_base \
--lr 0.002 \
--num_accumulated 32 \
--sampler BoundedBudgetSampler \
--max_num_tokens 2700 \
--epochs 30 \
Expand Down
6 changes: 4 additions & 2 deletions scripts/machine_translation/train_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,8 +441,10 @@ def train(args):
for sample_data, ctx in zip(sample_data_l, ctx_l):
if sample_data is None:
continue
src_token_ids, tgt_token_ids, src_valid_length, tgt_valid_length, sample_ids = sample_data
src_wc, tgt_wc, bs = src_valid_length.sum(), tgt_valid_length.sum(), src_token_ids.shape[0]
src_token_ids, tgt_token_ids, src_valid_length,\
tgt_valid_length, sample_ids = sample_data
src_wc, tgt_wc, bs = src_valid_length.sum(),\
tgt_valid_length.sum(), src_token_ids.shape[0]
loss_denom += tgt_wc - bs
log_loss_denom += tgt_wc - bs
log_wc += src_wc + tgt_wc
Expand Down
Loading

0 comments on commit dd45270

Please sign in to comment.