Skip to content

Commit

Permalink
[nnpi glow unit test] SLS tests shape sweep with hypothesis testing (p…
Browse files Browse the repository at this point in the history
…ytorch#36833)

Summary:
Pull Request resolved: pytorch#36833

Add hypothesis testing sweep for one test in each SLS test suite for different precisions.

Sweep random seed, embedding shape, batch_size, weight with hypothesis testing.

Refactor sls tests into proper file with precision labeled in filename.

Test Plan:
FB intern: buck test mode/dev //caffe2/caffe2/contrib/fakelowp/test:test_sls_8bit_nnpi_fp32nnpi

Will test OSS after exporting PR.

Reviewed By: yinghai

Differential Revision: D21098346

fbshipit-source-id: af167118e5289bb7178ffc27aaec3af101dcd828
  • Loading branch information
amylittleyang authored and facebook-github-bot committed Apr 19, 2020
1 parent 8b685a8 commit b0b9e70
Show file tree
Hide file tree
Showing 3 changed files with 327 additions and 297 deletions.
119 changes: 55 additions & 64 deletions caffe2/contrib/fakelowp/test/test_sls_4bit_nnpi_fp16.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,31 @@
from __future__ import unicode_literals

import numpy as np
import time
import unittest

# Must happen before importing caffe2.python.*
import caffe2.python.fakelowp.init_shared_libs # noqa

from hypothesis import given, settings
from hypothesis import given
from hypothesis import strategies as st
from caffe2.proto import caffe2_pb2
from caffe2.python import core, workspace, dyndep
from caffe2.python import core, workspace
from caffe2.python.onnx.onnxifi import onnxifi_caffe2_net
from caffe2.python.onnx.tests.test_utils import TestCase
from caffe2.python.fakelowp.test_utils import print_test_debug_info

workspace.GlobalInit(["caffe2", "--glow_global_fp16=1",
"--glow_global_fused_scale_offset_fp16=1",
"--glow_global_force_sls_fp16_accum=1"])


class SparseLengthsSumTest(unittest.TestCase):
class SparseLengthsSum4BitFakeNNPIFp16Test(unittest.TestCase):
@given(seed=st.integers(0, 65535))
def test_slws_fused_4bit_rowwise_all_same(self, seed):
np.random.seed(seed)
workspace.ResetWorkspace()
n = 1
m = 2
data = np.ones((n, m)).astype(np.float32) * 0.2 - 0.1

max_segments = 5
max_segment_length = 100
num_lengths = np.random.randint(1, max_segments + 1)
Expand All @@ -43,7 +40,6 @@ def test_slws_fused_4bit_rowwise_all_same(self, seed):
weights = np.random.uniform(low=-0.5, high=0.5,
size=[len(indices)]).astype(np.float32)
weights = np.ones(len(indices)).astype(np.float32)

pred_net = caffe2_pb2.NetDef()
pred_net.name = "pred"
pred_net.external_input.extend(
Expand All @@ -56,7 +52,6 @@ def test_slws_fused_4bit_rowwise_all_same(self, seed):
["Y"],
)
)

ref_net = caffe2_pb2.NetDef()
ref_net.name = "ref"
ref_net.external_input.extend(
Expand All @@ -69,7 +64,6 @@ def test_slws_fused_4bit_rowwise_all_same(self, seed):
["Y"],
)
)

workspace.FeedBlob("data", data)
workspace.RunOperatorOnce(
core.CreateOperator(
Expand All @@ -78,7 +72,6 @@ def test_slws_fused_4bit_rowwise_all_same(self, seed):
['quantized_data']
)
)

print("quantized", workspace.FetchBlob("quantized_data"))
pred_net_onnxified = onnxifi_caffe2_net(
pred_net,
Expand All @@ -89,24 +82,18 @@ def test_slws_fused_4bit_rowwise_all_same(self, seed):
adjust_batch=True,
use_onnx=False
)

num_onnxified_ops = sum(
1 if o.type == "Onnxifi" else 0 for o in pred_net_onnxified.op)
np.testing.assert_equal(num_onnxified_ops, 1)

workspace.FeedBlob("indices", indices)
workspace.FeedBlob("lengths", lengths)
workspace.FeedBlob("weights", weights)

workspace.CreateNet(pred_net_onnxified)
workspace.CreateNet(ref_net)

workspace.RunNet(pred_net_onnxified.name)
Y_glow = workspace.FetchBlob('Y')

workspace.RunNet(ref_net.name)
Y_c2 = workspace.FetchBlob('Y')

if not np.allclose(Y_c2, Y_glow):
print_test_debug_info(
"slws_fused_4bit_rowwise",
Expand All @@ -121,33 +108,35 @@ def test_slws_fused_4bit_rowwise_all_same(self, seed):
"rowwise_diff": (Y_glow - Y_c2)[:, 0]})
assert(0)

@given(seed=st.integers(0, 65535))
def test_slws_fused_4bit_rowwise(self, seed):
np.random.seed(seed)

@given(
seed=st.integers(0, 65535),
num_rows=st.integers(2, 20),
embedding_dim=st.sampled_from([8, 12, 16, 24, 32, 54, 64, 128]),
batch_size=st.integers(1, 5),
max_weight=st.integers(0, 100),
)
def test_slws_fused_4bit_rowwise(self, seed, num_rows, embedding_dim, batch_size, max_weight):
workspace.ResetWorkspace()
np.random.seed(seed)
data = np.random.rand(num_rows, embedding_dim).astype(np.float32)
lengths = np.random.choice(np.arange(1, num_rows), batch_size).astype(np.int32)

n = 20000
DIM = 6
data = (4 * np.random.random_sample((n, DIM)) + 1).astype(np.float32)
indices = []
for length in lengths:
indices.extend(np.random.choice(np.arange(1, num_rows), length))
indices = np.asarray(indices).astype(np.int64)

max_segments = 200
max_segment_length = 200
num_lengths = np.random.randint(0, max_segments + 1)
# number of segments to run
lengths = np.random.randint(2, max_segment_length + 1, size=num_lengths).astype(
np.int32
)
num_indices = np.sum(lengths)
indices = np.random.randint(low=0, high=n, size=num_indices, dtype=np.int64)
weights = np.random.uniform(low=0.01, high=0.5, size=[len(indices)]).astype(
np.float32
)
weights = np.random.uniform(
low=0,
high=max_weight,
size=[len(indices)]
).astype(np.float32)

pred_net = caffe2_pb2.NetDef()
pred_net.name = "pred"
pred_net.external_input.extend(
["quantized_data", "weights", "indices", "lengths"]
)
["quantized_data", "weights", "indices", "lengths"])
pred_net.external_output.append("Y")
pred_net.op.add().CopyFrom(
core.CreateOperator(
Expand All @@ -160,8 +149,7 @@ def test_slws_fused_4bit_rowwise(self, seed):
ref_net = caffe2_pb2.NetDef()
ref_net.name = "ref"
ref_net.external_input.extend(
["quantized_data", "weights", "indices", "lengths"]
)
["quantized_data", "weights", "indices", "lengths"])
ref_net.external_output.append("Y")
ref_net.op.add().CopyFrom(
core.CreateOperator(
Expand All @@ -174,49 +162,52 @@ def test_slws_fused_4bit_rowwise(self, seed):
workspace.FeedBlob("data", data)
workspace.RunOperatorOnce(
core.CreateOperator(
"FloatToFused4BitRowwiseQuantized", ["data"], ["quantized_data"]
"FloatToFused4BitRowwiseQuantized",
["data"],
["quantized_data"]
)
)
onnxified_net = onnxifi_caffe2_net(

pred_net_onnxified = onnxifi_caffe2_net(
pred_net,
{},
max_batch_size=max_segments,
max_seq_size=max_segments * max_segment_length,
max_batch_size=batch_size,
max_seq_size=batch_size * np.max(lengths),
debug=True,
adjust_batch=True,
use_onnx=False,
use_onnx=False
)

num_onnxified_ops = sum(
1 if o.type == "Onnxifi" else 0 for o in pred_net_onnxified.op)
np.testing.assert_equal(num_onnxified_ops, 1)

workspace.FeedBlob("indices", indices)
workspace.FeedBlob("lengths", lengths)
workspace.FeedBlob("weights", weights)

workspace.CreateNet(onnxified_net)
workspace.CreateNet(pred_net_onnxified)
workspace.CreateNet(ref_net)

workspace.RunNet(onnxified_net.name)
Y_glow = workspace.FetchBlob("Y")
workspace.RunNet(pred_net_onnxified.name)
Y_glow = workspace.FetchBlob('Y')

workspace.RunNet(ref_net.name)
Y_ref = workspace.FetchBlob("Y")
Y_c2 = workspace.FetchBlob('Y')

diff = np.abs((Y_ref - Y_glow) / (Y_ref + 1e-8))
max_err = np.max(diff, axis=1)
num_offenders = (max_err > 0).sum()
if num_offenders > 0:
if not np.allclose(Y_c2, Y_glow):
print_test_debug_info(
"slws_fused_4bit",
{
"indices": indices,
"data": data.shape,
"lengths": lengths,
"weights": weights,
"Y_glow": Y_glow,
"Y_ref": Y_ref,
"diff": diff,
"rowwise_diff": np.max(diff, axis=1),
},
)
assert 0
"slws_fused_4bit_rowwise",
{"seed": seed,
"indices": indices,
"data": data,
"lengths": lengths,
"weights": weights,
"Y_c2": Y_c2,
"Y_glow": Y_glow,
"diff": Y_glow - Y_c2,
"rowwise_diff": (Y_glow - Y_c2)[:, 0]})
assert(0)

if __name__ == '__main__':
unittest.main()
Loading

0 comments on commit b0b9e70

Please sign in to comment.