2024-03-13 nightly release (3507412)

pytorch · Mar 13, 2024 · 7d8e22d · 7d8e22d
1 parent 862f755
commit 7d8e22d
Show file tree

Hide file tree

Showing 427 changed files with 18,621 additions and 12,868 deletions.
diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt
@@ -1 +1 @@
-f5b99976adcbb01fd71bd0a39ea15bdac6c9e48a
+6ca9ae4f8693639c395544327f7e362441a58c79
diff --git a/.ci/docker/common/install_conda.sh b/.ci/docker/common/install_conda.sh
@@ -44,13 +44,15 @@ install_pip_dependencies() {
 }
 
 fix_conda_ubuntu_libstdcxx() {
+  cat /etc/issue
   # WARNING: This is a HACK from PyTorch core to be able to build PyTorch on 22.04.
-  # The issue still exists with the latest conda 23.10.0-1 at the time of writing
-  # (2023/11/16).
+  # Specifically, ubuntu-20+ all comes lib libstdc++ newer than 3.30+, but anaconda
+  # is stuck with 3.29. So, remove libstdc++6.so.3.29 as installed by
+  # https://anaconda.org/anaconda/libstdcxx-ng/files?version=11.2.0
   #
   # PyTorch sev: https://github.com/pytorch/pytorch/issues/105248
   # Ref: https://github.com/pytorch/pytorch/blob/main/.ci/docker/common/install_conda.sh
-  if grep -e "[12][82].04.[623]" /etc/issue >/dev/null; then
+  if grep -e "2[02].04." /etc/issue >/dev/null; then
     rm "/opt/conda/envs/py_${PYTHON_VERSION}/lib/libstdc++.so.6"
   fi
 }

diff --git a/.github/pytorch-probot.yml b/.github/pytorch-probot.yml
@@ -2,3 +2,5 @@
 ciflow_push_tags:
 - ciflow/nightly
 - ciflow/trunk
+- ciflow/binaries
+- ciflow/binaries/all
diff --git a/.github/workflows/build-wheels-linux.yml b/.github/workflows/build-wheels-linux.yml
@@ -0,0 +1,57 @@
+# From https://github.com/pytorch/test-infra/wiki/Using-Nova-Reusable-Build-Workflows
+name: Build Linux Wheels
+
+on:
+  pull_request:
+    paths:
+      - build/packaging/**
+      - .github/workflows/build-wheels-linux.yml
+  push:
+    branches:
+      - nightly
+      - release/*
+    tags:
+      # NOTE: Binary build pipelines should only get triggered on release candidate builds
+      # Release candidate tags look like: v1.11.0-rc1
+      - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
+      - ciflow/binaries/*
+  workflow_dispatch:
+
+jobs:
+  generate-matrix:
+    uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
+    with:
+      package-type: wheel
+      os: linux
+      test-infra-repository: pytorch/test-infra
+      test-infra-ref: main
+      with-cuda: disabled
+      with-rocm: disabled
+
+  build:
+    needs: generate-matrix
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - repository: pytorch/executorch
+            pre-script: build/packaging/pre_build_script.sh
+            post-script: build/packaging/post_build_script.sh
+            smoke-test-script: build/packaging/smoke_test.py
+            package-name: executorch
+    name: ${{ matrix.repository }}
+    uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
+    with:
+      repository: ${{ matrix.repository }}
+      ref: ""
+      test-infra-repository: pytorch/test-infra
+      test-infra-ref: main
+      build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
+      pre-script: ${{ matrix.pre-script }}
+      post-script: ${{ matrix.post-script }}
+      package-name: ${{ matrix.package-name }}
+      smoke-test-script: ${{ matrix.smoke-test-script }}
+      trigger-event: ${{ github.event_name }}
diff --git a/.github/workflows/build-wheels-m1.yml b/.github/workflows/build-wheels-m1.yml
@@ -0,0 +1,58 @@
+# From https://github.com/pytorch/test-infra/wiki/Using-Nova-Reusable-Build-Workflows
+name: Build M1 Wheels
+
+on:
+  pull_request:
+    paths:
+      - build/packaging/**
+      - .github/workflows/build-wheels-m1.yml
+  push:
+    branches:
+      - nightly
+      - release/*
+    tags:
+      # NOTE: Binary build pipelines should only get triggered on release candidate builds
+      # Release candidate tags look like: v1.11.0-rc1
+      - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
+      - ciflow/binaries/*
+  workflow_dispatch:
+
+jobs:
+  generate-matrix:
+    uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
+    with:
+      package-type: wheel
+      os: macos-arm64
+      test-infra-repository: pytorch/test-infra
+      test-infra-ref: main
+      with-cuda: disabled
+      with-rocm: disabled
+
+  build:
+    needs: generate-matrix
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - repository: pytorch/executorch
+            pre-script: build/packaging/pre_build_script.sh
+            post-script: build/packaging/post_build_script.sh
+            smoke-test-script: build/packaging/smoke_test.py
+            package-name: executorch
+    name: ${{ matrix.repository }}
+    uses: pytorch/test-infra/.github/workflows/build_wheels_macos.yml@main
+    with:
+      repository: ${{ matrix.repository }}
+      ref: ""
+      test-infra-repository: pytorch/test-infra
+      test-infra-ref: main
+      build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
+      pre-script: ${{ matrix.pre-script }}
+      post-script: ${{ matrix.post-script }}
+      package-name: ${{ matrix.package-name }}
+      runner-type: macos-m1-stable
+      smoke-test-script: ${{ matrix.smoke-test-script }}
+      trigger-event: ${{ github.event_name }}
diff --git a/.gitmodules b/.gitmodules
@@ -55,3 +55,7 @@
 [submodule "backends/vulkan/third-party/Vulkan-Headers"]
 	path = backends/vulkan/third-party/Vulkan-Headers
 	url = https://github.com/KhronosGroup/Vulkan-Headers
+[submodule "third-party/lm-evaluation-harness"]
+	path = third-party/lm-evaluation-harness
+	url = https://github.com/EleutherAI/lm-evaluation-harness
+	branch = v0.4.1
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -98,7 +98,7 @@ endif()
 # data into sections so they can be properly gc'd. -s: strip symbol.
 # -fno-exceptions -fno-rtti: disables exceptions and runtime type.
 set(CMAKE_CXX_FLAGS_RELEASE
-    "-O2 -ffunction-sections -fdata-sections -fno-exceptions -fno-rtti")
+    "-ffunction-sections -fdata-sections -fno-exceptions -fno-rtti")
 if(NOT APPLE)
   set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s")
 endif()
@@ -340,6 +340,10 @@ if(EXECUTORCH_BUILD_SDK)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/sdk)
 endif()
 
+if(EXECUTORCH_BUILD_EXTENSION_APPLE)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/apple)
+endif()
+
 if(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/data_loader)
 endif()

diff --git a/backends/apple/coreml/quantizer/coreml_quantizer.py b/backends/apple/coreml/quantizer/coreml_quantizer.py
@@ -0,0 +1,5 @@
+# Copyright © 2024 Apple Inc. All rights reserved.
+#
+# Please refer to the license found in the LICENSE file in the root directory of the source tree.
+
+from coremltools.optimize.torch.quantization._coreml_quantizer import CoreMLQuantizer
diff --git a/backends/apple/coreml/test/test_coreml_quantizer.py b/backends/apple/coreml/test/test_coreml_quantizer.py
@@ -0,0 +1,107 @@
+# Copyright © 2024 Apple Inc. All rights reserved.
+#
+# Please refer to the license found in the LICENSE file in the root directory of the source tree.
+
+import numpy as np
+import pytest
+from typing import Tuple
+
+import torch
+from torch._export import capture_pre_autograd_graph
+from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e, prepare_qat_pt2e
+
+from executorch.backends.apple.coreml.quantizer.coreml_quantizer import CoreMLQuantizer
+
+from coremltools.optimize.torch.quantization.quantization_config import (
+    LinearQuantizerConfig,
+    QuantizationScheme,
+)
+
+
+class TestCoreMLQuantizer:
+    @staticmethod
+    def quantize_and_compare(
+        model,
+        example_inputs: Tuple[torch.Tensor],
+        quantization_type: str,
+    ) -> None:
+        assert quantization_type in {"PTQ", "QAT"}
+
+        pre_autograd_aten_dialect = capture_pre_autograd_graph(model, example_inputs)
+
+        quantization_config = LinearQuantizerConfig.from_dict(
+            {
+                "global_config": {
+                    "quantization_scheme": QuantizationScheme.symmetric,
+                    "milestones": [0, 0, 10, 10],
+                    "activation_dtype": torch.quint8,
+                    "weight_dtype": torch.qint8,
+                    "weight_per_channel": True,
+                }
+            }
+        )
+        quantizer = CoreMLQuantizer(quantization_config)
+
+        if quantization_type == "PTQ":
+            prepared_graph = prepare_pt2e(pre_autograd_aten_dialect, quantizer)
+        elif quantization_type == "QAT":
+            prepared_graph = prepare_qat_pt2e(pre_autograd_aten_dialect, quantizer)
+
+        prepared_graph(*example_inputs)
+        converted_graph = convert_pt2e(prepared_graph)
+
+        model_output = model(*example_inputs).detach().numpy()
+        quantized_output = converted_graph(*example_inputs).detach().numpy()
+        np.testing.assert_allclose(quantized_output, model_output, rtol=5e-2, atol=5e-2)
+
+    @pytest.mark.parametrize("quantization_type", ("PTQ", "QAT"))
+    def test_conv_relu(self, quantization_type):
+        SHAPE = (1, 3, 256, 256)
+
+        class Model(torch.nn.Module):
+            def __init__(self) -> None:
+                super().__init__()
+                self.conv = torch.nn.Conv2d(
+                    in_channels=3, out_channels=16, kernel_size=3, padding=1
+                )
+                self.relu = torch.nn.ReLU()
+
+            def forward(self, x: torch.Tensor) -> torch.Tensor:
+                a = self.conv(x)
+                return self.relu(a)
+
+        model = Model()
+
+        example_inputs = (torch.randn(SHAPE),)
+        self.quantize_and_compare(
+            model,
+            example_inputs,
+            quantization_type,
+        )
+
+    @pytest.mark.parametrize("quantization_type", ("PTQ", "QAT"))
+    def test_linear(self, quantization_type):
+        SHAPE = (1, 5)
+
+        class Model(torch.nn.Module):
+            def __init__(self) -> None:
+                super().__init__()
+                self.linear = torch.nn.Linear(5, 10)
+
+            def forward(self, x: torch.Tensor) -> torch.Tensor:
+                return self.linear(x)
+
+        model = Model()
+
+        example_inputs = (torch.randn(SHAPE),)
+        self.quantize_and_compare(
+            model,
+            example_inputs,
+            quantization_type,
+        )
+
+
+if __name__ == "__main__":
+    test_runner = TestCoreMLQuantizer()
+    test_runner.test_conv_relu("PTQ")
+    test_runner.test_linear("QAT")
diff --git a/backends/arm/operators/op_addmm.py b/backends/arm/operators/op_addmm.py
@@ -65,7 +65,15 @@ def define_node(
         stride_attr = [1, 1]
         dilation_attr = [1, 1]
 
-        input_zp = -128 if is_quant_node else 0
+        input_zp = 0
+        if is_quant_node:
+            input_node = node.all_input_nodes[1]
+            # rank > 2 linear layer
+            if input_node.target == exir_ops.edge.aten.view_copy.default:
+                quant_node = input_node.all_input_nodes[0]
+            else:
+                quant_node = input_node
+            input_zp = get_quant_node_args(quant_node)[1]
         attr.ConvAttribute(
             pad=pad_attr,
             stride=stride_attr,

diff --git a/backends/arm/operators/op_placeholder.py b/backends/arm/operators/op_placeholder.py
@@ -38,9 +38,14 @@ def process_placeholder(
         if consumer_node.target in dq_q_ops:
             _, weight_node_scale, weight_node_zp, _, _, _ = getNodeArgs(consumer_node)
 
+            int8_max = np.iinfo(np.int8).max
+            int8_min = np.iinfo(np.int8).min
             parameter_values_quantized = (
-                (parameter_values / weight_node_scale.number) + weight_node_zp.number
-            ).astype(np.int8)
+                ((parameter_values / weight_node_scale.number) + weight_node_zp.number)
+                .round()
+                .clip(int8_min, int8_max)
+                .astype(np.int8)
+            )
             tosa_graph.addConst(
                 inputs[0].shape,
                 ts.DType.INT8,
@@ -63,8 +68,10 @@ def process_placeholder(
             weight_node_scale, weight_node_zp = get_quant_node_args(weight_node)
 
             bias_values_quantized = (
-                parameter_values / (input_node_scale * weight_node_scale)
-            ).astype(np.int32)
+                (parameter_values / (input_node_scale * weight_node_scale))
+                .round()
+                .astype(np.int32)
+            )
 
             tosa_graph.addConst(
                 inputs[0].shape,
@@ -86,8 +93,8 @@ def process_placeholder(
             weight_node_scale, _ = get_quant_node_args(weight_node)
 
             bias_scales = input_node_scale * weight_node_scale
-            parameter_values_quantized = (parameter_values / bias_scales).astype(
-                np.int32
+            parameter_values_quantized = (
+                (parameter_values / bias_scales).round().astype(np.int32)
             )
 
             tosa_graph.addConst(

diff --git a/backends/arm/test/arm_tosa_reference.py b/backends/arm/test/arm_tosa_reference.py
@@ -41,8 +41,6 @@
 SUPPORTED_BI_TEST_LIST = [
     "simple_add",
     "simple_add_broadcast",
-    "simple_linear",
-    "simple_linear_rank4",
     "simple_conv2d_3x3_1x3x256x256_stride1",
     "simple_conv2d_1x1_1x2x128x128_stride1",
     "simple_conv2d_2x2_1x1x14x14_stride2",
@@ -250,9 +248,8 @@ def tosa_run_test(op, profile=TosaProfile.MI):  # noqa: C901
         # Need to dequant back to FP32 for running comparison with Torch output
         if profile is TosaProfile.BI:
             tosa_output = (
-                np.round(tosa_output - output_quantization_zp)
-                * output_quantization_scale
-            )
+                tosa_output - output_quantization_zp
+            ) * output_quantization_scale
 
     ## Read the Torch Output
     torch_file = open(TORCH_OUT_PATH + "/torch_output.npy", "rb")

diff --git a/backends/arm/test/models/test_mobilenet_v2_arm.py b/backends/arm/test/models/test_mobilenet_v2_arm.py
@@ -40,8 +40,8 @@ def test_mv2_tosa_MI(self):
                 backend=ArmBackendSelector.TOSA,
             )
             .export()
-            .check(list(self.all_operators))
             .to_edge()
+            .check(list(self.all_operators))
             .partition()
             .to_executorch()
             .run_method()
@@ -59,8 +59,8 @@ def test_mv2_tosa_BI(self):
             )
             .quantize()
             .export()
-            .check(list(self.all_operators))
             .to_edge()
+            .check(list(self.all_operators))
             .partition()
             .to_executorch()
             .run_method()
@@ -78,8 +78,8 @@ def test_mv2_u55_BI(self):
             )
             .quantize()
             .export()
-            .check(list(self.all_operators))
             .to_edge()
+            .check(list(self.all_operators))
             .partition()
             .to_executorch()
         )