2024-03-16 nightly release (39c93aa)

pytorch · Mar 16, 2024 · a04dd04 · a04dd04
1 parent c005dd2
commit a04dd04
Show file tree

Hide file tree

Showing 15 changed files with 295 additions and 614 deletions.
diff --git a/.github/workflows/apple.yml b/.github/workflows/apple.yml
@@ -8,7 +8,7 @@ on:
   pull_request:
     paths:
       - .ci/docker/**
-      - .github/workflows/app-build.yml
+      - .github/workflows/apple.yml
       - install_requirements.sh
       - backends/apple/**
       - build/build_apple_frameworks.sh
@@ -58,7 +58,7 @@ jobs:
       python-version: '3.11'
       submodules: 'true'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      upload-artifact: executorch.zip
+      upload-artifact: executorch-frameworks-ios
       timeout: 90
       script: |
         WORKSPACE=$(pwd)
@@ -90,3 +90,50 @@ jobs:
         zip -r "${RUNNER_TEMP}/artifacts/${OUTPUT}.zip" "${OUTPUT}"
 
         popd
+
+  upload-frameworks-ios:
+    runs-on: ubuntu-22.04
+    needs: build-frameworks-ios
+    timeout-minutes: 30
+    permissions:
+      id-token: write
+      contents: read
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+          cache: pip
+      - name: configure aws credentials
+        uses: aws-actions/configure-aws-credentials@v1.7.0
+        with:
+          role-to-assume: arn:aws:iam::308535385114:role/gha_executorch_upload-frameworks-ios
+          aws-region: us-east-1
+      - name: Download the artifact
+        uses: actions/download-artifact@v3
+        with:
+          # NB: The name here needs to match the upload-artifact name from build-frameworks-ios job
+          name: executorch-frameworks-ios
+          path: ${{ runner.temp }}/frameworks-ios/
+      - name: Only push to S3 from main branch
+        if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
+        shell: bash
+        run: |
+          set -eux
+          echo "UPLOAD_ON_MAIN=1" >> "${GITHUB_ENV}"
+      - name: Upload the artifact to ossci-ios S3 bucket
+        shell: bash
+        run: |
+          set -eux
+
+          pip install awscli==1.32.18
+
+          AWS_CMD="aws s3 cp --dryrun"
+          if [[ "${UPLOAD_ON_MAIN:-0}" == "1" ]]; then
+            AWS_CMD="aws s3 cp"
+          fi
+
+          for FILENAME in "${RUNNER_TEMP}"/frameworks-ios/*.zip; do
+            [ -e "${FILENAME}" ] || continue
+            ${AWS_CMD} "${FILENAME}" s3://ossci-ios/executorch/ --acl public-read
+          done
diff --git a/...n/runtime/graph/ops/glsl/all_shaders.yaml → ...kan/runtime/graph/ops/glsl/binary_op.yaml b/...n/runtime/graph/ops/glsl/all_shaders.yaml → ...kan/runtime/graph/ops/glsl/binary_op.yaml
@@ -28,35 +28,3 @@ binary_op:
       OPERATOR: pow(X, Y)
     - NAME: binary_floor_divide
       OPERATOR: floor(X / Y)
-
-image_to_nchw:
-  parameter_names_with_default_values:
-    NDIM: 3
-    DTYPE: float
-    PACKING: CHANNELS_PACKED
-  generate_variant_forall:
-    DTYPE:
-      - VALUE: "half"
-        SUFFIX: "half"
-      - VALUE: "float"
-        SUFFIX: "float"
-  shader_variants:
-    - NAME: image3d_to_nchw_C_packed
-    - NAME: image2d_to_nchw_C_packed
-      NDIM: 2
-
-nchw_to_image:
-  parameter_names_with_default_values:
-    NDIM: 3
-    DTYPE: float
-    PACKING: CHANNELS_PACKED
-  generate_variant_forall:
-    DTYPE:
-      - VALUE: "half"
-        SUFFIX: "half"
-      - VALUE: "float"
-        SUFFIX: "float"
-  shader_variants:
-    - NAME: nchw_to_image3d_C_packed
-    - NAME: nchw_to_image2d_C_packed
-      NDIM: 2
diff --git a/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.yaml b/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.yaml
@@ -0,0 +1,21 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+image_to_nchw:
+  parameter_names_with_default_values:
+    NDIM: 3
+    DTYPE: float
+    PACKING: CHANNELS_PACKED
+  generate_variant_forall:
+    DTYPE:
+      - VALUE: "half"
+        SUFFIX: "half"
+      - VALUE: "float"
+        SUFFIX: "float"
+  shader_variants:
+    - NAME: image3d_to_nchw_C_packed
+    - NAME: image2d_to_nchw_C_packed
+      NDIM: 2
diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.yaml b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.yaml
@@ -0,0 +1,21 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+nchw_to_image:
+  parameter_names_with_default_values:
+    NDIM: 3
+    DTYPE: float
+    PACKING: CHANNELS_PACKED
+  generate_variant_forall:
+    DTYPE:
+      - VALUE: "half"
+        SUFFIX: "half"
+      - VALUE: "float"
+        SUFFIX: "float"
+  shader_variants:
+    - NAME: nchw_to_image3d_C_packed
+    - NAME: nchw_to_image2d_C_packed
+      NDIM: 2
diff --git a/backends/xnnpack/partition/graphs/bilinear_2d.py b/backends/xnnpack/partition/graphs/bilinear_2d.py
@@ -10,6 +10,8 @@
 import executorch.exir as exir
 import torch
 
+from executorch.backends.xnnpack.utils.configs import get_xnnpack_edge_compile_config
+
 
 @lru_cache(maxsize=None)
 def _get_bilinear_2d_graphs():
@@ -37,7 +39,9 @@ def forward(self, x):
         for config in capture_configs:
             edge = exir.capture(
                 bilinear2d(align_corners), sample_inputs, config
-            ).to_edge()
+            ).to_edge(
+                config=get_xnnpack_edge_compile_config(),
+            )
             _bilinear2d_graphs[edge.exported_program.graph_module] = align_corners
     return _bilinear2d_graphs
 

diff --git a/backends/xnnpack/partition/graphs/sdpa.py b/backends/xnnpack/partition/graphs/sdpa.py
@@ -8,6 +8,7 @@
 from typing import List, Optional
 
 import torch
+from executorch.backends.xnnpack.utils.configs import get_xnnpack_edge_compile_config
 from executorch.exir import to_edge
 from torch import Tensor
 from torch.export import export
@@ -75,7 +76,8 @@ def forward(
                         v,
                         mask,
                     ),
-                )
+                ),
+                compile_config=get_xnnpack_edge_compile_config(),
             )
             gm = edge.exported_program().graph_module
             graphs.append(gm)

diff --git a/build/install_flatc.sh b/build/install_flatc.sh
@@ -26,9 +26,23 @@ readonly NC="\033[0m" # No Color
 
 # Prints the flatbuffers version of the git submodule.
 print_flatbuffers_version(){
-    pushd "${FLATBUFFERS_PATH}" > /dev/null
-    git describe --tags "$(git rev-list --tags --max-count=1)" | sed 's/^v//'
-    popd > /dev/null
+    local version_file="${FLATBUFFERS_PATH}/package.json"
+    local version
+    # Extract the version from the first line like `"version": "23.5.26",`
+    # First remove the final double quote, then remove everything
+    # before the now-final double quote.
+    version="$(
+        grep '"version"\s*:' "${version_file}" \
+        | head -1 \
+        | sed -e 's/"[^"]*$//' \
+        | sed -e 's/.*"//'
+        )"
+    if [[ ${version} =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
+        echo "${version}"
+    else
+        echo "ERROR: Bad version '${version}'; could not find version in ${version_file}" >&2
+        exit 1
+    fi
 }
 
 main() {

diff --git a/examples/models/llama2/custom_ops/op_sdpa.cpp b/examples/models/llama2/custom_ops/op_sdpa.cpp
@@ -177,10 +177,6 @@ inline void fill_stub(scalar_t* data, scalar_t val, int64_t size) {
   for (; d < size - (size % Vec::size()); d += Vec::size()) {
     data_vec.store(data + d);
   }
-#if !defined(_MSC_VER) && !defined(COMPILING_FOR_MIN_SIZE) && \
-    !defined(__ANDROID__)
-#pragma unroll
-#endif
   for (; d < size; d++) {
     data[d] = val;
   }

diff --git a/examples/models/llama2/install_requirements.sh b/examples/models/llama2/install_requirements.sh
@@ -10,13 +10,8 @@
 pip install snakeviz sentencepiece
 pip install torchao-nightly
 
-# Install datasets for HuggingFace dataloader
-# v2.14.0 is intentional to force lm-eval v0.3.0 compatibility
-pip install datasets==2.14.0
-
 # Install lm-eval for Model Evaluation with lm-evalution-harness
-# v0.3.0 is intentional
-pip install lm-eval==0.3.
+pip install lm-eval
 
 # Call the install helper for further setup
 python examples/models/llama2/install_requirement_helper.py
diff --git a/examples/models/llama2/ops/quantized_ops.py b/examples/models/llama2/ops/quantized_ops.py
@@ -4,6 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+from typing import Optional
+
 import torch
 from torch.library import impl, impl_abstract
 
@@ -62,43 +64,45 @@ def embedding_byte_weight_checks(weight, weight_scales, weight_zero_points):
     assert weight_zero_points is None or weight_zero_points.size(0) == weight.size(
         0
     ), f"Expecting weight_zero_points tensor to be None or have same number of rows as weights, but found {weight.size()} and {weight_zero_points.size()}"
-    if not weight_zero_points:
-        weight_zero_points = torch.zeros(weight.size(0))
 
 
 @impl(quantized_lib, "embedding_byte", "CompositeExplicitAutograd")
-def embedding_byte_meta(
-    weight,
-    weight_scales,
-    weight_zero_points,
-    weight_quant_min,
-    weight_quant_max,
-    indices,
-):
+def embedding_byte(
+    weight: torch.Tensor,
+    weight_scales: torch.Tensor,
+    weight_zero_points: Optional[torch.Tensor],
+    weight_quant_min: int,
+    weight_quant_max: int,
+    indices: torch.Tensor,
+) -> torch.Tensor:
     embedding_byte_weight_checks(weight, weight_scales, weight_zero_points)
-    weight = torch.ops.quantized_decomposed.dequantize_per_channel.default(
+    group_size = weight.size(1) // (
+        weight_scales.size(1) if weight_scales.dim() == 2 else 1
+    )
+    weight = torch.ops.quantized_decomposed.dequantize_per_channel_group.default(
         weight,
         weight_scales,
         weight_zero_points,
-        0,
         weight_quant_min,
         weight_quant_max,
         weight.dtype,
+        group_size,
+        weight_scales.dtype,
     )
     return torch.ops.aten.embedding.default(weight, indices)
 
 
 @impl_abstract("llama_quantized::embedding_byte.out")
 def embedding_byte_out_meta(
-    weight,
-    weight_scales,
-    weight_zero_points,
-    weight_quant_min,
-    weight_quant_max,
-    indices,
-    out,
-):
-    return embedding_byte_meta(
+    weight: torch.Tensor,
+    weight_scales: torch.Tensor,
+    weight_zero_points: Optional[torch.Tensor],
+    weight_quant_min: int,
+    weight_quant_max: int,
+    indices: torch.Tensor,
+    out: torch.Tensor,
+) -> torch.Tensor:
+    return embedding_byte(
         weight,
         weight_scales,
         weight_zero_points,
@@ -109,42 +113,46 @@ def embedding_byte_out_meta(
 
 
 @impl(quantized_lib, "embedding_byte.dtype", "CompositeExplicitAutograd")
-def embedding_byte_dtype_meta(
-    weight,
-    weight_scales,
-    weight_zero_points,
-    weight_quant_min,
-    weight_quant_max,
-    indices,
+def embedding_byte_dtype(
+    weight: torch.Tensor,
+    weight_scales: torch.Tensor,
+    weight_zero_points: Optional[torch.Tensor],
+    weight_quant_min: int,
+    weight_quant_max: int,
+    indices: torch.Tensor,
     *,
-    dtype,
-):
+    dtype: Optional[torch.dtype] = None,
+) -> torch.Tensor:
     embedding_byte_weight_checks(weight, weight_scales, weight_zero_points)
-    weight = torch.ops.quantized_decomposed.dequantize_per_channel.default(
+    group_size = weight.size(1) // (
+        weight_scales.size(1) if weight_scales.dim() == 2 else 1
+    )
+    weight = torch.ops.quantized_decomposed.dequantize_per_channel_group.default(
         weight,
         weight_scales,
         weight_zero_points,
-        0,
         weight_quant_min,
         weight_quant_max,
         weight.dtype,
+        group_size,
+        dtype,
     )
-    return torch.ops.aten.embedding.default(weight, indices).to(dtype)
+    return torch.ops.aten.embedding.default(weight, indices)
 
 
 @impl_abstract("llama_quantized::embedding_byte.dtype_out")
 def embedding_byte_dtype_out_meta(
-    weight,
-    weight_scales,
-    weight_zero_points,
-    weight_quant_min,
-    weight_quant_max,
-    indices,
+    weight: torch.Tensor,
+    weight_scales: torch.Tensor,
+    weight_zero_points: Optional[torch.Tensor],
+    weight_quant_min: int,
+    weight_quant_max: int,
+    indices: torch.Tensor,
     *,
-    dtype,
-    out,
-):
-    return embedding_byte_dtype_meta(
+    dtype: Optional[torch.dtype] = None,
+    out: torch.Tensor,
+) -> torch.Tensor:
+    return embedding_byte_dtype(
         weight,
         weight_scales,
         weight_zero_points,